def execute_interpolateBiomassFromAverages(self,experiment_id_I, sample_ids_I=[]):
     '''Interpolate the OD600 based on the Calculated growth rates (hr-1) of the average and time the sample was taken
     for samples in the experiment that do not have a measured OD600 value
     Use cases:
     1. a replicate is bad'''
     
     calc = calculate_interface();
     data = [];
     #query sample_ids for the experiment that do not have an OD600 but have a time
     print('execute interoplate biomass from averages...')
     if sample_ids_I:
         sample_ids = [];
         sample_ids = sample_ids_I;
     else:
         sample_ids = [];
         sample_ids = self.get_sampleIDs_experimentIDNoOD600_samplePhysiologicalParameters(experiment_id_I)
     for si in sample_ids:
         print('interpolating biomass from averages for sample_id' + si);
         #query rate parameters for the sample_id
         slope_average, intercept_average, rate_average, rate_units, rate_var = None,None,None,None,None;
         slope_average, intercept_average, rate_average, rate_units, rate_var = self.get_rateData_experimentIDAndSampleIDAndMetID_dataStage01PhysiologyRatesAverages(experiment_id_I,si,'biomass');
         #query physiological parameters for the sample_id
         pp = {};
         pp = self.get_physiologicalParameters_experimentIDAndSampleID_samplePhysiologicalParameters(experiment_id_I,si)
         #query sample_date
         sample_date = None;
         sample_date = self.get_sampleDate_experimentIDAndSampleID_sampleDescription(experiment_id_I,si);
         #interpolate based off of the regression parameters
         time = sample_date.year*8765.81277 + sample_date.month*730.484  + sample_date.day*365.242 + sample_date.hour + sample_date.minute / 60. + sample_date.second / 3600.;
         biomass = exp(time*slope_average+intercept_average);
         #update sample_physiologicalParameters
         pp['od600'] = biomass;
         data.append(pp);  
     self.update_data_samplePhysiologicalParameters(data)
 def execute_calculateBiomassFromBrothAverage(self,experiment_id_I, sample_ids_I=[]):
     '''Calculate the OD600 based on the average OD600 of the broth samples
     for samples in the experiment that do not have a measured OD600 value
     Use cases:
     1. the sample Filtrate'''
     
     calc = calculate_interface();
     data = [];
     #query sample_ids for the experiment that do not have an OD600
     print('execute calculate biomass from broth averages...')
     if sample_ids_I:
         sample_ids = [];
         sample_ids = sample_ids_I;
     else:
         sample_ids = [];
         sample_ids = self.get_sampleIDs_experimentIDAndSampleDescriptionNoOD600_samplePhysiologicalParameters(experiment_id_I,'Filtrate')
     for si in sample_ids:
         print('calculating biomass from broth averages for sample_id ' + si);
         #query physiological parameters for the sample_id
         pp = {};
         pp = self.get_physiologicalParameters_experimentIDAndSampleID_samplePhysiologicalParameters(experiment_id_I,si)
         #query sample_date
         sample_date = None;
         sample_date = self.get_sampleDate_experimentIDAndSampleID_sampleDescription(experiment_id_I,si);
         #query od600 values from biological broth replicates
         broth_od600 = [];
         broth_od600 = self.get_OD600s_experimentIDAndSampleID_samplePhysiologicalParameters(experiment_id_I,si);
         #update sample_physiologicalParameters
         pp['od600'] = numpy.mean(broth_od600);
         data.append(pp);  
     self.update_data_samplePhysiologicalParameters(data)
Exemplo n.º 3
0
    def calculate_genesFpkmTrackingStats(self,
                experiment_id_I = None,
                sample_name_I = None,):
        """calculate statistics of replicate samples from genesFpkmTracking

        INPUT:
        OPTION INPUT:
        experiment_id_I = limiter for the experiment_id
        sample_name_I = limiter for the sample_name
        
        """
        
        data_O=[];
        stats_O=[];
        experiment_id = experiment_id_I;
        sn = sample_name_I;
        genesFpkmTracking = self.genesFpkmTracking;
        calculate = calculate_interface();
        # get the uniqueSampleNameAbbreviations
        sna_unique = self._get_uniqueSampleNameAbbreviations();
        for sna in sna_unique:
            data_tmp = [];
            data_tmp = self._get_rowsBySampleNameAbbreviation(sna);
            # calculate using scipy
            data_ave_O, data_var_O, data_lb_O, data_ub_O = None, None, None, None;
            data_ave_O, data_var_O, data_lb_O, data_ub_O = calculate.calculate_ave_var(data_tmp,confidence_I = 0.95);
            # calculate the interquartile range
            min_O, max_O, median_O, iq_1_O, iq_3_O = None, None, None, None, None;
            min_O, max_O, median_O, iq_1_O, iq_3_O=calculate.calculate_interquartiles(data_tmp);
    def make_heatmap(self,mutations_I=[],sample_names_I=[], mutation_id_exclusion_list=[],max_position=4000000,
                row_pdist_metric_I='euclidean',row_linkage_method_I='complete',
                col_pdist_metric_I='euclidean',col_linkage_method_I='complete'):
        '''Execute hierarchical cluster on row and column data'''

        print('executing heatmap...');
        calculate = calculate_interface();

        # partition into variables:
        if mutations_I: mutation_data = mutations_I;
        else: mutation_data = self.mutations;
        if sample_names_I: sample_names = sample_names_I;
        else: sample_names = self.sample_names;
        mutation_data_O = [];
        mutation_ids_all = [];
        for end_cnt,mutation in enumerate(mutation_data):
            if int(mutation['mutation_position']) > max_position: #ignore positions great than 4000000
                continue;
            # mutation id
            mutation_id = '';
            mutation_id = self._make_mutationID(mutation['mutation_genes'],mutation['mutation_type'],int(mutation['mutation_position']))
            tmp = {};
            tmp.update(mutation);
            tmp.update({'mutation_id':mutation_id});
            mutation_data_O.append(tmp);
            mutation_ids_all.append(mutation_id);
        mutation_ids_all_unique = list(set(mutation_ids_all));
        mutation_ids = [x for x in mutation_ids_all_unique if not x in mutation_id_exclusion_list];
        # generate the frequency matrix data structure (mutation x intermediate)
        data_O = numpy.zeros((len(sample_names),len(mutation_ids)));
        samples=[];
        # order 2: groups each sample by mutation (intermediate x mutation)
        for sample_name_cnt,sample_name in enumerate(sample_names): #all samples for intermediate j / mutation i
            samples.append(sample_name); # corresponding label from hierarchical clustering
            for mutation_cnt,mutation in enumerate(mutation_ids): #all mutations i for intermediate j
                for row in mutation_data_O:
                    if row['mutation_id'] == mutation and row['sample_name'] == sample_name:
                        data_O[sample_name_cnt,mutation_cnt] = row['mutation_frequency'];
        # generate the clustering for the heatmap
        heatmap_O = [];
        dendrogram_col_O = {};
        dendrogram_row_O = {};
        heatmap_O,dendrogram_col_O,dendrogram_row_O = calculate.heatmap(data_O,samples,mutation_ids,
                row_pdist_metric_I=row_pdist_metric_I,row_linkage_method_I=row_linkage_method_I,
                col_pdist_metric_I=col_pdist_metric_I,col_linkage_method_I=col_linkage_method_I);
        # record the data
        self.heatmap = heatmap_O;
        self.dendrogram_col = dendrogram_col_O;
        self.dendrogram_row = dendrogram_row_O;
    def execute_calculateGrowthRates(self,experiment_id_I,sample_name_short_I=[]):
        '''Calculate growth rates (hr-1) based on the sample time and measured OD600'''

        calc = calculate_interface();
        data_O = [];
        #query sample names
        print('executing calculating growth rates...')
        if sample_name_short_I:
            sample_name_short = sample_name_short_I;
        else:
            sample_name_short = [];
            sample_name_short = self.get_sampleNameShort_experimentID(experiment_id_I,6)
        for sns in sample_name_short:
            print('calculating growth rates for sample_name_short ' + sns);
            #query met_ids
            met_ids = [];
            met_ids = self.get_metIDs_experimentIDAndSampleNameShort(experiment_id_I,6,sns);
            for met in met_ids:
                print('calculating growth rates for met_id ' + met);
                if met != 'biomass': continue;
                #query time and OD600 values
                time, OD600 = [], [];
                time, OD600 = self.get_sampleDateAndDataCorrected_experimentIDAndSampleNameShortAndMetIDAndDataUnits(experiment_id_I,6,sns,met,'OD600');
                if not OD600 or not time: continue;
                #convert time to hrs
                time_hrs = [];
                for t in time:
                    time_hrs.append(t.year*8765.81277 + t.month*730.484  + t.day*365.242 + t.hour + t.minute / 60. + t.second / 3600.); #convert using datetime object
                #calculate growth rate and r2
                slope, intercept, r2, p_value, std_err = calc.calculate_growthRate(time_hrs,OD600)
                #add rows to the data base
                row = {};
                row = {'experiment_id':experiment_id_I,
                    'sample_name_short':sns,
                    'met_id':met,
                    'slope':slope,
                    'intercept':intercept,
                    'r2':r2,
                    'rate':slope,
                    'rate_units':'hr-1',
                    'p_value':p_value,
                    'std_err':std_err,
                    'used_':True,
                    'comment_':None,};
                data_O.append(row);
        self.add_rows_table('data_stage01_physiology_rates',data_O);
Exemplo n.º 6
0
    def calculate_fluxDifference(self,flux_1,flux_stdev_1,flux_lb_1,flux_ub_1,flux_units_1,
                                                                            flux_2,flux_stdev_2,flux_lb_2,flux_ub_2,flux_units_2,
                                                                            criteria_I = 'flux_lb/flux_ub'):
        """Calculate flux differences and deterimine if the differences are significant
        Input:
        flux_1 = data for flux 1 to be compared
        ...
        flux_2 = data for flux 2 to be compared
        ... 
        criteria_I = string, flux_lb/flux_ub: use flux_lb and flux_ub to determine significance (default)
                             flux_mean/flux_stdev: use the flux_mean and flux_stdev to determine significance

        Output:
        flux_diff = relative flux difference, float
        flux_distance = geometric difference, (i.e., distance)
        fold_change = geometric fold change
        significant = boolean
    
        """
        calc = calculate_interface();
        flux_diff = 0.0;
        flux_distance = 0.0;
        significant = False;
        fold_change = 0.0;
        if criteria_I == 'flux_lb/flux_ub':
            flux_mean_1 = np.mean([flux_lb_1,flux_ub_1]);
            flux_mean_2 = np.mean([flux_lb_2,flux_ub_2]);
            flux_diff = calc.calculate_difference(flux_mean_1,flux_mean_2,type_I='relative');
            flux_distance = calc.calculate_difference(flux_mean_1,flux_mean_2,type_I='geometric');
            fold_change = calc.calculate_foldChange(flux_mean_1,flux_mean_2,type_I='geometric');
            significant = self.determine_fluxDifferenceSignificance(flux_lb_1,flux_ub_1,flux_lb_2,flux_ub_2);
        elif criteria_I == 'flux_mean/flux_stdev':
            flux_diff = calc.calculate_difference(flux_1,flux_2,type_I='relative');
            flux_distance = calc.calculate_difference(flux_1,flux_2,type_I='geometric');
            fold_change = calc.calculate_foldChange(flux_1,flux_2,type_I='geometric');
            flux_lb_1 = flux_1 - flux_stdev_1;
            flux_lb_2 = flux_2 - flux_stdev_2;
            flux_ub_1 = flux_1 + flux_stdev_1;
            flux_ub_2 = flux_2 + flux_stdev_2;
            significant = self.determine_fluxDifferenceSignificance(flux_lb_1,flux_ub_1,flux_lb_2,flux_ub_2);
        else:
            print('criteria not recognized!');
        return flux_diff,flux_distance,fold_change,significant;
    def execute_updatePhysiologicalParametersFromOD600(self, experiment_id_I, sample_ids_I=[]):
        '''Calculate physiological parameters from the OD600 and volume sample'''
        calc = calculate_interface();
        data = [];
        #query sample_ids for the experiment that have an OD600, but do not have culture_density
        print('execute update physiological parameters from OD600...')
        if sample_ids_I:
            sample_ids = [];
            sample_ids = sample_ids_I;
        else:
            sample_ids = [];
            sample_ids = self.get_sampleIDs_experimentIDWithOD600NoCultureDensity_samplePhysiologicalParameters(experiment_id_I)
        for si in sample_ids:
            print('updating physiological parameters from OD600 for sample_id ' + si);
            #Query sample_description
            desc = {};
            desc = self.get_description_experimentIDAndSampleID_sampleDescription(experiment_id_I,si)
            if not desc['biological_material']: continue;
            #query physiological parameters for the sample_id
            pp = {};
            pp = self.get_physiologicalParameters_experimentIDAndSampleID_samplePhysiologicalParameters(experiment_id_I,si)
            #Query conversions (conversion_name: gDW2OD_lab and ODspecificCellConcentration_lab)
            conversion_gDW2OD = None;
            conversion_gDW2OD_units = None;
            conversion_gDW2OD, conversion_gDW2OD_units = self.get_conversionAndConversionUnits_biologicalMaterialAndConversionName(desc['biological_material'],'gDW2OD_lab');
            conversion_ODspecificCellConcentration = None;
            conversion_ODspecificCellConcentration_units = None;
            conversion_ODspecificCellConcentration, conversion_ODspecificCellConcentration_units = self.get_conversionAndConversionUnits_biologicalMaterialAndConversionName(desc['biological_material'],'ODspecificCellConcentration_lab');
            #Calculate the vcd, culture_density from the OD600 and conversions
            culture_density,culture_density_units = None,None;
            culture_density,culture_density_units = calc.calculate_cultureDensity_ODAndConversionAndConversionUnits(pp['od600'],conversion_gDW2OD, conversion_gDW2OD_units);
            vcd,vcd_units = None,None;
            vcd,vcd_units = calc.calculate_cultureDensity_ODAndConversionAndConversionUnits(pp['od600'],conversion_ODspecificCellConcentration, conversion_ODspecificCellConcentration_units);
            #Calculate the cells, dcw, wcw from the OD600, culture_volume_sampled and conversions

            #Update sample_physiologicalparameters
            pp['culture_density'],pp['culture_density_units'] = culture_density,culture_density_units;
            pp['vcd'],pp['vcd_units'] = vcd,vcd_units;
            data.append(pp);
        self.update_data_samplePhysiologicalParameters(data);
    def execute_findShortestPath_nodes(self,model_id_I,
            nodes_startAndStop_I,
            algorithm_I='all_simple_paths',params_I={'cutoff':25},
            exclusion_list_I=[],
            weights_I=None
            ):
        '''
        INPUT:
        model_id_I: model id [string]
        nodes_startAndStop_I: list of node start/stops
            e.g., [[nad_c,nadh_c],[g6p_c,f6p_c],...]
        OUTPUT:
        shortest_path_O = [{[nad_c,nadh_c]:algorithm_I output},...]

        distance = (len(sp['shortest_path'])-1)/2
        '''
        calc = calculate_interface();
        shortest_path_O = [];
        # get the model reactions from table
        reactions = self.get_rows_modelID_dataStage02PhysiologyModelReactions(model_id_I);
        #convert rxns list to directed graph
        aCyclicGraph = self.convert_modelReactionsTable2DirectedAcyclicGraph(
            reactions,weights_I=weights_I,attributes_I={},
            exclusion_list_I=exclusion_list_I);
        # find the shortest paths
        for startAndStop in nodes_startAndStop_I:
            tmp = {'start':startAndStop[0],'stop':startAndStop[1]};
            try:
                output2 = self.find_shortestPath_nodes(
                    aCyclicGraph,startAndStop[0],startAndStop[1],
                    algorithm_I=algorithm_I,params_I=params_I);
                if str(type(output2))=="<class 'generator'>":
                    paths = [o for o in output2];
                    distances = [(len(p)-1)/2 for p in paths]
                else:
                    paths = [output2];
                    distances = [(len(output2)-1)/2];
            except Exception as e:
                print(e);
                print('algorithm = ' + algorithm_I + '; start = ' + startAndStop[0] + '; stop = ' + startAndStop[1]);
                continue;
            #calculate descriptive statistics on the paths
            try:
                data_ave_O, data_var_O, data_lb_O, data_ub_O = calc.calculate_ave_var(distances,confidence_I = 0.95);
                if data_ave_O:
                    data_cv_O = sqrt(data_var_O)/data_ave_O*100;
                else:
                    data_cv_O = None;
                min_O, max_O, median_O, iq_1_O, iq_3_O=calc.calculate_interquartiles(distances);
            except Exception as e:
                print(e);
                print('algorithm = ' + algorithm_I + '; start = ' + startAndStop[0] + '; stop = ' + startAndStop[1]);
                continue;
            tmp['all_paths'] = paths;
            tmp['algorithm'] = algorithm_I;
            tmp['params'] = params_I;
            tmp['path_max'] = max_O;
            tmp['path_min'] = min_O;
            tmp['path_iq_1'] = iq_1_O;
            tmp['path_iq_3'] = iq_3_O;
            tmp['path_median'] = median_O;
            tmp['path_average'] = data_ave_O;
            tmp['path_var'] = data_var_O;
            tmp['path_n'] = len(paths);
            tmp['path_cv'] = data_cv_O;
            tmp['path_ci_lb'] = data_lb_O;
            tmp['path_ci_ub'] = data_ub_O;
            tmp['path_ci_level'] = 0.95;
            shortest_path_O.append(tmp);
        return shortest_path_O;
    def findAndCalculate_amplificationStats_fromGff(self,gff_file,
                strand_start, strand_stop,
                experiment_id_I = None,
                sample_name_I = None,
                scale_factor=True, downsample_factor=0,
                reads_min=1.5,reads_max=5.0,
                indices_min=200,consecutive_tol=10):
        """find amplifications from the gff file and calculate their statistics

        INPUT:
        strand_start = index of the start position
        strand_stop = index of the stop position
        scale_factor = boolean, if true, reads will be normalized to have 100 max
        downsample_factor = integer, factor to downsample the points to
        reads_min = minimum number of reads to identify an amplification
        reads_max = maximum number of reads to identify an amplification
        indices_min : minimum number of points of a high coverage region
        consecutive_tol: maximum number of consecutive points that do not meet the coverage_min/max criteria that can be included a high coverage region

        OPTION INPUT:
        experiment_id_I = tag for the experiment from which the sample came
        sample_name_I = tag for the sample name
        
        """
        data_O=[];
        stats_O=[];
        experiment_id = experiment_id_I;
        sn = sample_name_I;
        calculate = calculate_interface();
        # get the data_dir
        self.set_gffFile(gff_file);
        # extract the strands
        self.extract_strandsFromGff(strand_start, strand_stop, scale=scale_factor, downsample=0)
        # find high coverage regions
        plus_high_region_indices,minus_high_region_indices = self.find_highCoverageRegions(coverage_min=reads_min,coverage_max=reads_max,points_min=indices_min,consecutive_tol=consecutive_tol);
        
        # record the means for later use
        plus_mean,minus_mean = self.plus.mean(),self.minus.mean();
        plus_min,minus_min = self.plus.min(),self.minus.min();
        plus_max,minus_max = self.plus.max(),self.minus.max();
        # calculate stats on the high coverage regions
        # + strand
        for row_cnt,row in enumerate(plus_high_region_indices):
            plus_region = self.plus_high_regions[(self.plus_high_regions.index>=row['start']) & (self.plus_high_regions.index<=row['stop'])]
            # calculate using scipy
            data_ave_O, data_var_O, data_lb_O, data_ub_O = None, None, None, None;
            data_ave_O, data_var_O, data_lb_O, data_ub_O = calculate.calculate_ave_var(plus_region.values,confidence_I = 0.95);
            # calculate the interquartile range
            min_O, max_O, median_O, iq_1_O, iq_3_O = None, None, None, None, None;
            min_O, max_O, median_O, iq_1_O, iq_3_O=calculate.calculate_interquartiles(plus_region.values);
            # record data
            stats_O.append({
                #'analysis_id':analysis_id,
                'experiment_id':experiment_id,
                'sample_name':sn,
                'genome_chromosome':1,
                'genome_strand':'plus',
                'strand_start':strand_start,
                'strand_stop':strand_stop,
                'reads_min':min_O,
                'reads_max':max_O,
                'reads_lb':data_lb_O,
                'reads_ub':data_ub_O,
                'reads_iq1':iq_1_O,
                'reads_iq3':iq_3_O,
                'reads_median':median_O,
                'reads_mean':data_ave_O,
                'reads_var':data_var_O,
                'reads_n':len(plus_region.values),
                'amplification_start':int(row['start']),
                'amplification_stop':int(row['stop']),
                'used_':True,
                'comment_':None
                })
            # downsample
            collapse_factor = None;
            if downsample_factor > 1:
                collapse_factor = int((row['stop'] - row['start']) / downsample_factor)
            if collapse_factor and collapse_factor > 1:
                plus_region = plus_region.groupby(lambda x: x // collapse_factor).mean()
                plus_region.index *= collapse_factor
            # add mean to index before and after the amplification start and stop, respectively (for visualization)
            if downsample_factor > 1 and row_cnt==0:
                #plus_region[strand_start]=plus_mean;
                #plus_region[strand_stop]=plus_mean;
                data_O.append({
                    #'analysis_id':analysis_id,
                    'experiment_id':experiment_id,
                    'sample_name':sn,
                    'genome_chromosome':1, #default
                    'genome_strand':'plus_mean',
                    #'genome_index':int(strand_start),
                    'genome_index':int(row['start']-1),
                    'strand_start':strand_start,
                    'strand_stop':strand_stop,
                    'reads':plus_mean,
                    'reads_min':reads_min,
                    'reads_max':reads_max,
                    'indices_min':indices_min,
                    'consecutive_tol':consecutive_tol,
                    'scale_factor':scale_factor,
                    'downsample_factor':downsample_factor,
                    'amplification_start':strand_start,
                    'amplification_stop':strand_stop,
                    'used_':True,
                    'comment_':'mean reads of the plus strand'
                    });
            if downsample_factor > 1 and row_cnt==len(plus_high_region_indices)-1:
                data_O.append({
                    #'analysis_id':analysis_id,
                    'experiment_id':experiment_id,
                    'sample_name':sn,
                    'genome_chromosome':1, #default
                    'genome_strand':'plus_mean',
                    #'genome_index':int(strand_stop),
                    'genome_index':int(row['stop']+1),
                    'strand_start':strand_start,
                    'strand_stop':strand_stop,
                    'reads':plus_mean,
                    'reads_min':reads_min,
                    'reads_max':reads_max,
                    'indices_min':indices_min,
                    'consecutive_tol':consecutive_tol,
                    'scale_factor':scale_factor,
                    'downsample_factor':downsample_factor,
                    'amplification_start':strand_start,
                    'amplification_stop':strand_stop,
                    'used_':True,
                    'comment_':'mean reads of the plus strand'
                    });
            ## add zeros to strand start and stop, respectively (for visualization)
            #if downsample_factor > 1:
            #    plus_region[row['start']-1]=plus_mean;
            #    plus_region[row['stop']+1]=plus_mean;
            # record high coverage regions
            for index,reads in plus_region.iteritems():
                data_O.append({
                    #'analysis_id':analysis_id,
                    'experiment_id':experiment_id,
                    'sample_name':sn,
                    'genome_chromosome':1, #default
                    'genome_strand':'plus',
                    'genome_index':int(index),
                    'strand_start':strand_start,
                    'strand_stop':strand_stop,
                    'reads':float(reads),
                    'reads_min':reads_min,
                    'reads_max':reads_max,
                    'indices_min':indices_min,
                    'consecutive_tol':consecutive_tol,
                    'scale_factor':scale_factor,
                    'downsample_factor':downsample_factor,
                    'amplification_start':int(row['start']),
                    'amplification_stop':int(row['stop']),
                    'used_':True,
                    'comment_':None
                });
        # - strand
        for row_cnt,row in enumerate(minus_high_region_indices):
            minus_region = self.minus_high_regions[(self.minus_high_regions.index>=row['start']) & (self.minus_high_regions.index<=row['stop'])]
            # calculate using scipy
            data_ave_O, data_var_O, data_lb_O, data_ub_O = None, None, None, None;
            data_ave_O, data_var_O, data_lb_O, data_ub_O = calculate.calculate_ave_var(minus_region.values,confidence_I = 0.95);
            # calculate the interquartile range
            min_O, max_O, median_O, iq_1_O, iq_3_O = None, None, None, None, None;
            min_O, max_O, median_O, iq_1_O, iq_3_O=calculate.calculate_interquartiles(minus_region.values);
            # record data
            stats_O.append({
                #'analysis_id':analysis_id,
                'experiment_id':experiment_id,
                'sample_name':sn,
                'genome_chromosome':1,
                'genome_strand':'minus',
                'strand_start':strand_start,
                'strand_stop':strand_stop,
                'reads_min':min_O,
                'reads_max':max_O,
                'reads_lb':data_lb_O,
                'reads_ub':data_ub_O,
                'reads_iq1':iq_1_O,
                'reads_iq3':iq_3_O,
                'reads_median':median_O,
                'reads_mean':data_ave_O,
                'reads_var':data_var_O,
                'reads_n':len(minus_region.values),
                'amplification_start':int(row['start']),
                'amplification_stop':int(row['stop']),
                'used_':True,
                'comment_':None
                })
            # downsample
            collapse_factor = None;
            if downsample_factor > 1:
                collapse_factor = int((row['stop'] - row['start']) / downsample_factor)
            if collapse_factor and collapse_factor > 1:
                minus_region = minus_region.groupby(lambda x: x // collapse_factor).mean()
                minus_region.index *= collapse_factor
            # add mean to index before and after the amplification start and stop, respectively (for visualization)
            if downsample_factor > 1 and row_cnt==0:
                #minus_region[strand_start]=minus_mean;
                #minus_region[strand_stop]=minus_mean;
                data_O.append({
                    #'analysis_id':analysis_id,
                    'experiment_id':experiment_id,
                    'sample_name':sn,
                    'genome_chromosome':1, #default
                    'genome_strand':'minus_mean',
                    #'genome_index':int(strand_start),
                    'genome_index':int(row['start']-1),
                    'strand_start':strand_start,
                    'strand_stop':strand_stop,
                    'reads':minus_mean,
                    'reads_min':reads_min,
                    'reads_max':reads_max,
                    'indices_min':indices_min,
                    'consecutive_tol':consecutive_tol,
                    'scale_factor':scale_factor,
                    'downsample_factor':downsample_factor,
                    'amplification_start':strand_start,
                    'amplification_stop':strand_stop,
                    'used_':True,
                    'comment_':'mean reads of the minus strand'
                    });
            if downsample_factor > 1 and row_cnt==len(minus_high_region_indices)-1:
                data_O.append({
                    #'analysis_id':analysis_id,
                    'experiment_id':experiment_id,
                    'sample_name':sn,
                    'genome_chromosome':1, #default
                    'genome_strand':'minus_mean',
                    #'genome_index':int(strand_stop),
                    'genome_index':int(row['stop']+1),
                    'strand_start':strand_start,
                    'strand_stop':strand_stop,
                    'reads':minus_mean,
                    'reads_min':reads_min,
                    'reads_max':reads_max,
                    'indices_min':indices_min,
                    'consecutive_tol':consecutive_tol,
                    'scale_factor':scale_factor,
                    'downsample_factor':downsample_factor,
                    'amplification_start':strand_start,
                    'amplification_stop':strand_stop,
                    'used_':True,
                    'comment_':'mean reads of the minus strand'
                    });
            ## add zeros to strand start and stop, respectively (for visualization)
            #if downsample_factor > 1:
            #    minus_region[row['start']-1]=minus_mean;
            #    minus_region[row['stop']+1]=minus_mean;
            # record high coverage regions
            for index,reads in minus_region.iteritems():
                data_O.append({
                    #'analysis_id':analysis_id,
                    'experiment_id':experiment_id,
                    'sample_name':sn,
                    'genome_chromosome':1, #default
                    'genome_strand':'minus',
                    'genome_index':int(index),
                    'strand_start':strand_start,
                    'strand_stop':strand_stop,
                    'reads':float(reads),
                    'reads_min':reads_min,
                    'reads_max':reads_max,
                    'indices_min':indices_min,
                    'consecutive_tol':consecutive_tol,
                    'scale_factor':scale_factor,
                    'downsample_factor':downsample_factor,
                    'amplification_start':int(row['start']),
                    'amplification_stop':int(row['stop']),
                    'used_':True,
                    'comment_':None});
        #record the data
        self.amplifications = data_O;
        self.amplificationStats = stats_O;
    def execute_calculateMissingComponents_replicates(self,experiment_id_I,biological_material_I=None,conversion_name_I=None,sample_names_short_I=[]):
        '''calculate estimates for samples in which a component was not found for any of the replicates'''
        
        calc = calculate_interface();

        print('execute_calculateMissingComponents_replicates...')
        data_O=[];
        # get all sample names short
        if sample_names_short_I:
            sample_names_short = sample_names_short_I;
        else:
            sample_names_short = [];
            sample_names_short = self.get_sampleNameShort_experimentIDAndSampleDescription_dataStage01Normalized(experiment_id_I,'Broth');
        # get component names
        component_names = []
        component_names = self.get_componentNames_experimentID_dataStage01ReplicatesMI(experiment_id_I);
        # get time points
        time_points = [];
        time_points = self.get_timePoint_experimentID_dataStage01ReplicatesMI(experiment_id_I);
        for tp in time_points:
            print('calculating missing components for time_point ' + tp);
            for cn in component_names:
                print('calculating missing components for component_name ' + cn);
                component_group_name = None;
                calculated_concentration_units = None;
                component_group_name, calculated_concentration_units = self.get_componentGroupNameAndConcUnits_experimentIDAndComponentName_dataStage01Replicates(experiment_id_I,cn);
                for sns in sample_names_short:
                    print('calculating missing components for sample_name_short ' + sns);
                    # get calculated concentration
                    calculated_concentration = None;
                    calculated_concentration = self.get_calculatedConcentration_experimentIDAndSampleNameShortAndTimePointAndComponentName_dataStage01ReplicatesMI(experiment_id_I,sns,tp,cn);
                    if calculated_concentration: continue
                    # get the lloq
                    lloq = None;
                    conc_units = None;
                    lloq, conc_units = self.get_lloq_ExperimentIDAndComponentName_dataStage01LLOQAndULOQ(experiment_id_I,cn);
                    if not lloq:
                        print('lloq not found'); 
                        continue
                    # normalize the lloq
                    if (biological_material_I and conversion_name_I):
                        # get physiological parameters
                        cvs = None;
                        cvs_units = None;
                        od600 = None;
                        dil = None;
                        dil_units = None;
                        conversion = None;
                        conversion_units = None;
                        cvs, cvs_units, od600, dil,dil_units = self.get_CVSAndCVSUnitsAndODAndDilAndDilUnits_sampleNameShort(experiment_id_I,sns);
                        conversion, conversion_units = self.get_conversionAndConversionUnits_biologicalMaterialAndConversionName(biological_material_I,conversion_name_I);
                        if not(cvs and cvs_units and od600 and dil and dil_units):
                            print('cvs, cvs_units, or od600 are missing from physiological parameters');
                            print('or dil and dil_units are missing from sample descripton');
                            exit(-1);
                        elif not(conversion and conversion_units):
                            print('biological_material or conversion name is incorrect');
                            exit(-1);  
                        else:
                            #calculate the cell volume
                            cell_volume, cell_volume_units = self.calculate.calculate_biomass_CVSAndCVSUnitsAndODAndConversionAndConversionUnits(cvs,cvs_units,od600,conversion,conversion_units);
                            # calculate the normalized concentration
                            norm_conc = None;
                            norm_conc_units = None;
                            norm_conc, norm_conc_units = self.calculate.calculate_conc_concAndConcUnitsAndDilAndDilUnitsAndConversionAndConversionUnits(lloq,conc_units,dil,dil_units,cell_volume, cell_volume_units);
                            if norm_conc:
                                norm_conc = norm_conc/2;
                                # update data_stage01_quantification_normalized
                            #    dataListUpdated_I.append({'experiment_id':experiment_id_I,
                            #        'sample_name_short':sns,
                            #        'time_point':tp,
                            #        'component_group_name':component_group_name,
                            #        'component_name':cn,
                            #        'calculated_concentration':norm_conc,
                            #        'calculated_concentration_units':norm_conc_units,
                            #        'used_':True,
                            #        'comment_':None});
                                # populate data_stage01_quantification_replicatesMI
                                row = data_stage01_quantification_replicatesMI(experiment_id_I,sns,tp,component_group_name,cn,norm_conc,"lloq",None,norm_conc_units,True,None);
                                self.session.add(row);
                    else:
                        calc_conc = lloq/2;
                        # populate data_stage01_quantification_replicatesMI
                        #dataListUpdated_I.append({'experiment_id':experiment_id_I,
                        #        'sample_name_short':sns,
                        #        'time_point':tp,
                        #        'component_group_name':component_group_name,
                        #        'component_name':cn,
                        #        'calculated_concentration':calc_conc,
                        #        'calculated_concentration_units':conc_units,
                        #        'used_':True,
                        #        'comment_':None});
                        row = data_stage01_quantification_replicatesMI(experiment_id_I,sns,tp,component_group_name,cn,"lloq",None,calc_conc,conc_units,True);
                        self.session.add(row);
        #self.update_dataStage01ReplicatesMI(dataListUpdated_I);
        self.session.commit();
        #self.add_rows_table('data_stage01_quantification_replicatesMI',data_O)
    def execute_analyzePeakResolution(self,experiment_id_I,sample_names_I=[],sample_types_I=['Standard'],component_name_pairs_I=[],
                            acquisition_date_and_time_I=[None,None]):
        '''Analyze resolution for critical pairs
        Input:
        experiment_id_I
        sample_names_I
        sample_types_I
        component_name_pairs_I = [[component_name_1,component_name_2],...]
        acquisition_date_and_time_I = ['%m/%d/%Y %H:%M','%m/%d/%Y %H:%M']
        '''

        print('execute_peakInformation_resolution...')
        #convert string date time to datetime
        # e.g. time.strptime('4/15/2014 15:51','%m/%d/%Y %H:%M')
        acquisition_date_and_time = [];
        if acquisition_date_and_time_I and acquisition_date_and_time_I[0] and acquisition_date_and_time_I[1]:
            for dateandtime in acquisition_date_and_time_I:
                time_struct = strptime(dateandtime,'%m/%d/%Y %H:%M')
                dt = datetime.fromtimestamp(mktime(time_struct))
                acquisition_date_and_time.append(dt);
        else: acquisition_date_and_time=[None,None]
        data_O = [];
        component_names_pairs_all = [];
        # get sample names
        if sample_names_I and sample_types_I and len(sample_types_I)==1:
            sample_names = sample_names_I;
            sample_types = [sample_types_I[0] for sn in sample_names];
        else:
            sample_names = [];
            sample_types = [];
            for st in sample_types_I:
                sample_names_tmp = [];
                sample_names_tmp = self.get_sampleNames_experimentIDAndSampleType(experiment_id_I,st);
                sample_names.extend(sample_names_tmp);
                sample_types_tmp = [];
                sample_types_tmp = [st for sn in sample_names_tmp];
                sample_types.extend(sample_types_tmp);
        for sn in sample_names:
            print('analyzing peakInformation for sample_name ' + sn);
            for component_name_pair in component_name_pairs_I:
                # get critical pair data
                cpd1 = {};
                cpd2 = {};
                cpd1 = self.get_peakInfo_sampleNameAndComponentName(sn,component_name_pair[0],acquisition_date_and_time);
                cpd2 = self.get_peakInfo_sampleNameAndComponentName(sn,component_name_pair[1],acquisition_date_and_time);
                if cpd1 and cpd2 and cpd1['retention_time'] and cpd2['retention_time']:
                    # calculate the RT difference and resolution
                    rt_dif = 0.0;
                    rt_dif = abs(cpd1['retention_time']-cpd2['retention_time'])
                    resolution = 0.0;
                    resolution = rt_dif/(0.5*(cpd1['width_at_50']+cpd2['width_at_50']));
                    # record data
                    data_O.append({'component_name_pair':component_name_pair,
                                   'rt_dif':rt_dif,
                                   'resolution':resolution,
                                   'component_group_name_pair':[cpd1['component_group_name'],cpd2['component_group_name']],
                                   'sample_name':sn,
                                   'acquisition_date_and_time':cpd1['acquisition_date_and_time']});
        #TODO:
        # 1. make a calculation method
        # calculate statistics for specific parameters
        data_add = [];
        calc = calculate_interface();
        for cnp in component_name_pairs_I:
            data_parameters = {};
            data_parameters_stats = {};
            for parameter in ['rt_dif','resolution']:
                data_parameters[parameter] = [];
                data_parameters_stats[parameter] = {'ave':None,'var':None,'cv':None,'lb':None,'ub':None};
                acquisition_date_and_times = [];
                sample_names_parameter = [];
                sample_types_parameter = [];
                component_group_name_pair = None;
                for sn_cnt,sn in enumerate(sample_names):
                    for d in data_O:
                        if d['sample_name'] == sn and d['component_name_pair'] == cnp and d[parameter]:
                            data_parameters[parameter].append(d[parameter]);
                            acquisition_date_and_times.append(d['acquisition_date_and_time'])
                            sample_names_parameter.append(sn);
                            sample_types_parameter.append(sample_types[sn_cnt])
                            component_group_name_pair = d['component_group_name_pair'];
                ave,var,lb,ub = None,None,None,None;
                if len(data_parameters[parameter])>1:ave,var,lb,ub = calc.calculate_ave_var(data_parameters[parameter]);
                if ave:
                    cv = sqrt(var)/ave*100;
                    data_parameters_stats[parameter] = {'ave':ave,'var':var,'cv':cv,'lb':lb,'ub':ub};
                    # add data to the database:
                    row = {'experiment_id':experiment_id_I,
                        'component_group_name_pair':component_group_name_pair,
                        'component_name_pair':cnp,
                        'peakInfo_parameter':parameter,
                        'peakInfo_ave':data_parameters_stats[parameter]['ave'],
                        'peakInfo_cv':data_parameters_stats[parameter]['cv'],
                        'peakInfo_lb':data_parameters_stats[parameter]['lb'],
                        'peakInfo_ub':data_parameters_stats[parameter]['ub'],
                        'peakInfo_units':None,
                        'sample_names':sample_names_parameter,
                        'sample_types':sample_types_parameter,
                        'acqusition_date_and_times':acquisition_date_and_times,
                        'peakInfo_data':data_parameters[parameter],
                        'used_':True,
                        'comment_':None,};
                    data_add.append(row);
        self.add_rows_table('data_stage01_quantification_peakResolution',data_add);
 def execute_physiologicalRatios_replicatesMI(self,experiment_id_I):
     '''Calculate physiologicalRatios from replicates MI'''
     calc = calculate_interface();
     print('calculate_physiologicalRatios_replicates...')
     # get sample names short
     sample_names_short = [];
     sample_names_short = self.get_SampleNameShort_experimentID_dataStage01ReplicatesMI(experiment_id_I);
     data_O = [];
     ratios_calc_O = [];
     for sns in sample_names_short:
         print('calculating physiologicalRatios from replicates for sample_names_short ' + sns);
         # get time points
         time_points = [];
         time_points = self.get_timePoint_experimentIDAndSampleNameShort_dataStage01ReplicatesMI(experiment_id_I,sns);
         for tp in time_points:
             print('calculating physiologicalRatios from replicates for time_point ' + tp);
             for k,v in self.ratios.items():
                 print('calculating physiologicalRatios from replicates for ratio ' + k);
                 ratios_data={};
                 calcratios=True;
                 for cgn in v['component_group_name']:
                     ratios_data[cgn] = None;
                     # concentrations and units
                     conc = None;
                     conc_unit = None;
                     conc, conc_unit = self.get_concAndConcUnits_experimentIDAndSampleNameShortAndTimePointAndComponentGroupName_dataStage01ReplicatesMI(experiment_id_I,sns,tp,cgn);
                     if not(conc): 
                         calcratios=False;
                         break;
                     ratios_data[cgn]=conc;
                 # calculate the physiologicalratios
                 if not calcratios: continue
                 ratio_calc,num_calc,den_calc = self.calculate_physiologicalRatios(k,ratios_data);
                 # add data to the session
                 row = {"experiment_id":experiment_id_I,
                     "sample_name_short":sns,
                     "time_point":tp,
                     "physiologicalratio_id":k,
                     "physiologicalratio_name":v['name'],
                     "physiologicalratio_value":ratio_calc,
                     "physiologicalratio_description":v['description'],
                     "used_":True,
                     "comment_":None}   
                 data_O.append(row);
                 row = {"experiment_id":experiment_id_I,
                     "sample_name_short":sns,
                     "time_point":tp,
                     "physiologicalratio_id":k+'_numerator',
                     "physiologicalratio_name":v['name']+'_numerator',
                     "physiologicalratio_value":num_calc,
                     "physiologicalratio_description":v['description'].split('/')[0],
                     "used_":True,
                     "comment_":None}   
                 data_O.append(row);
                 row = {"experiment_id":experiment_id_I,
                     "sample_name_short":sns,
                     "time_point":tp,
                     "physiologicalratio_id":k+'_denominator',
                     "physiologicalratio_name":v['name']+'_denominator',
                     "physiologicalratio_value":den_calc,
                     "physiologicalratio_description":v['description'].split('/')[1],
                     "used_":True,
                     "comment_":None}
                 data_O.append(row);
                     
     self.add_rows_table('data_stage01_quantification_physiologicalRatios_replicates',data_O);
    def execute_calculateGeoAverages_replicates(
        self,
        experiment_id_I,
        sample_name_abbreviations_I=[],
        time_points_I=[],
        calculated_concentration_units_I=[],
        ):
        '''Calculate the averages from replicates MI in ln space'''

        calc = calculate_interface();

        print(' execute_calculateGeoAverages_replicates...')
        data_O = [];
        # get unique calculated_concentration_units/sample_name_abbreviations/component_names/component_group_names/time_points
        unique_rows = [];
        unique_rows = self.get_sampleNameAbbreviationsAndCalculatedConcentrationUnitsAndTimePointsAndComponentNames_experimentID_dataStage01QuantificationReplicatesMI(
            experiment_id_I,
            sample_name_abbreviations_I,
            time_points_I,
            calculated_concentration_units_I,
            exp_type_I=4)
        for unique_row in unique_rows:
            # get sample names short
            sample_names_short = [];
            sample_names_short = self.get_sampleNameShort_experimentIDAndSampleNameAbbreviationAndComponentNameAndTimePointAndCalculatedConcentrationUnits_dataStage01ReplicatesMI(
                experiment_id_I,
                unique_row['sample_name_abbreviation'],
                unique_row['component_name'],
                unique_row['time_point'],
                unique_row['calculated_concentration_units']
                );
            concs = [];
            conc_units = None;
            for sns in sample_names_short:
                # concentrations and units
                conc = None;
                conc = self.get_calculatedConcentration_experimentIDAndSampleNameShortAndTimePointAndComponentNameAndCalculatedConcentrationUnits_dataStage01ReplicatesMI(
                    experiment_id_I,
                    sns,
                    unique_row['time_point'],
                    unique_row['component_name'],
                    unique_row['calculated_concentration_units']
                    );
                if (not(conc) or conc==0): continue;
                # calculate the ln of the concentration
                # and convert to M from mM or uM
                if (unique_row['calculated_concentration_units'] == 'mM'): 
                    conc_units = 'M'; 
                    conc = conc*1e-3;
                elif (unique_row['calculated_concentration_units'] == 'uM'):
                    conc_units = 'M'; 
                    conc = conc*1e-6;
                elif (unique_row['calculated_concentration_units'] == 'uM'):
                    conc_units = 'M'; 
                    conc = conc*1e-6;
                elif (unique_row['calculated_concentration_units'] == 'umol*gDW-1'):
                    conc_units = 'mol*gDW-1';
                    conc = conc*1e-6;
                elif (unique_row['calculated_concentration_units'] == 'height_ratio' \
                    or unique_row['calculated_concentration_units'] == 'area_ratio'):
                    continue;
                else:
                    print('units of ' + str(unique_row['calculated_concentration_units']) + ' are not supported')
                    exit(-1);
                concs.append(conc);
            n_replicates = len(concs);
            conc_average = 0.0;
            conc_var = 0.0;
            conc_lb = 0.0;
            conc_ub = 0.0;
            # calculate average and CV of concentrations
            if (not(concs)): 
                continue
            elif n_replicates<2: 
                continue
            else: 
                conc_average, conc_var, conc_lb, conc_ub = calc.calculate_ave_var_geometric(concs);

            # add data to the session
            row = {"experiment_id":experiment_id_I, 
                "sample_name_abbreviation":unique_row['sample_name_abbreviation'], 
                "time_point":unique_row['time_point'], 
                "component_group_name":unique_row['component_group_name'], 
                "component_name":unique_row['component_name'],
                "n_replicates":n_replicates, 
                "calculated_concentration_average":conc_average, 
                "calculated_concentration_var":conc_var, 
                "calculated_concentration_lb":conc_lb, 
                "calculated_concentration_ub":conc_ub, 
                "calculated_concentration_units":conc_units, 
                "used_":True
                };   
            data_O.append(row);
        self.add_rows_table('data_stage01_quantification_averagesmigeo',data_O)
 def execute_calculateUptakeAndSecretionRates(self,experiment_id_I,sample_name_short_I=[],QC_filename_O=None):
     '''Calculate uptake and secretion rates (mmol*gDCW-1*hr-1) based on the sample time,
     measured gDCW (calculated from the OD600),
     and calculated growth rate (hr-1)'''
     
     calc = calculate_interface();
     data_O = [];
     #query sample names
     print('execute calculate uptake and secretion rates...')
     if sample_name_short_I:
         sample_name_short = sample_name_short_I;
     else:
         sample_name_short = [];
         sample_name_short = self.get_sampleNameShort_experimentID(experiment_id_I,7)
     for sns in sample_name_short:
         print('calculating uptake and secretion rates for sample_name_short ' +sns);
         #query met_ids
         met_ids = [];
         met_ids = self.get_metIDs_experimentIDAndSampleNameShort(experiment_id_I,7,sns);
         for met in met_ids:
             print('calculating uptake and secretion rates for met_id ' + met);
             if met == 'biomass': continue; #ignore biomass (calculated previously)
             #query time,conc (mM), and sample_ids
             time, conc, sample_ids = [], [], []; #sorted by sample_date
             time, conc, sample_ids = self.get_sampleDateAndDataCorrectedAndSampleIDs_experimentIDAndSampleNameShortAndMetIDAndDataUnits(experiment_id_I,7,sns,met,'mM');
             if not conc or not time: continue;
             #query slope, intercept, and rate for the growth rate
             slope, intercept, r2, gr_rate, rate_units, p_value, std_err = None,None,None,None,None,None,None;
             slope, intercept, r2, gr_rate, rate_units, p_value, std_err = self.get_rateData_experimentIDAndSampleNameShortAndMetID_dataStage01PhysiologyRates(experiment_id_I,sns,'biomass');
             #query OD600 and DCW from sample_physiologicalparameters
             OD600, culture_density = [],[]; #sorted by sample_date
             for si in sample_ids:
                 OD600_tmp, culture_density_tmp = None,None;
                 OD600_tmp, culture_density_tmp = self.get_OD600AndCultureDensity_experimentIDAndSampleID_samplePhysiologicalParameters(experiment_id_I,7,si);
                 OD600.append(OD600_tmp);
                 culture_density.append(culture_density_tmp);
             #check that the length of DCW and conc match
             if len(conc)!=len(culture_density):
                 print('The length of measured concentrations and measured dcw do not match!')
             #convert time to hrs
             time_hrs = [];
             for t in time:
                 time_hrs.append(t.year*8765.81277 + t.month*730.484  + t.day*365.242 + t.hour + t.minute / 60. + t.second / 3600.); #convert using datetime object
             #calculate growth rate and r2
             slope, intercept, r2, rate, rate_units, p_value, std_err = None,None,None,None,None,None,None;
             slope, intercept, r2, p_value, std_err, rate = calc.calculate_uptakeAndSecretionRate(culture_density,conc,gr_rate)
             #record time, conc, and culture density for QC
             for si_cnt,si in enumerate(sample_ids):
                 tmp={};
                 tmp['sample_name_short']=sns;
                 tmp['met_id']=met;
                 tmp['sample_id']=si;
                 tmp['time [hr]']=time_hrs[si_cnt];
                 tmp['OD600']=OD600[si_cnt];
                 tmp['culture_density [gDW*L-1]']=culture_density[si_cnt];
                 tmp['concentration [mM]']=conc[si_cnt];
                 tmp['growth_rate [hr-1]']=gr_rate;
                 data_O.append(tmp);
             #add rows to the data base
             row = [];
             row = data_stage01_physiology_rates(experiment_id_I, sns, met,
                  slope, intercept, r2, rate, 'mmol*gDCW-1*hr-1',
                  p_value, std_err,
                  True, None);
             self.session.add(row);
     self.session.commit();
     if QC_filename_O:
         io = base_exportData(data_O);
         io.write_dict2csv(QC_filename_O,['sample_name_short','met_id','sample_id',
                                          'time [hr]','OD600','culture_density [gDW*L-1]',
                                          'concentration [mM]','growth_rate [hr-1]']);
    def export_dataStage01NormalizedAndAverages_js(self,
                analysis_id_I,
                sample_name_abbreviations_I=[],
                sample_names_I=[],
                component_names_I=[],
                cv_threshold_I=40,
                extracellular_threshold_I=80,
                data_dir_I='tmp'):
        '''export data_stage01_quantification_normalized and averages for visualization with ddt'''

        calc = calculate_interface();
        
        print('export_dataStage01Normalized_js...')
        data_norm_broth = [];
        data_norm_filtrate = [];
        data_norm_combined = [];
        data_ave = [];

        #SPLIT 1:
        #1 query unique calculated_concentration_units/sample_name_abbreviations/component_names/component_group_names/time_points/sample_names/sample_ids/sample_description
        uniqueRows_all = self.getQueryResult_groupNormalizedAveragesSamples_analysisID_dataStage01QuantificationNormalizedAndAverages(
                analysis_id_I
            );
        #2 filter in broth samples
        uniqueRows = self.filter_groupNormalizedAveragesSamples_experimentID_dataStage01QuantificationNormalizedAndAverages_limsSampleAndSampleID(
                uniqueRows_all,
                calculated_concentration_units_I=[],
                component_names_I=component_names_I,
                component_group_names_I=[],
                sample_names_I=sample_names_I,
                sample_name_abbreviations_I=sample_name_abbreviations_I,
                time_points_I=[],
            );
        if type(uniqueRows)==type(listDict()):
            uniqueRows.convert_dataFrame2ListDict()
            uniqueRows = uniqueRows.get_listDict();
        replicates_tmp = {};#reorganize the data into a dictionary for quick traversal of the replicates
        for uniqueRow_cnt,uniqueRow in enumerate(uniqueRows):
            unique = (
                      uniqueRow['sample_name_abbreviation'],
                      uniqueRow['experiment_id'],
                      uniqueRow['time_point'],
                      uniqueRow['component_name'],
                      uniqueRow['calculated_concentration_units'])
            if not unique in replicates_tmp.keys():
                replicates_tmp[unique] = [];
            replicates_tmp[unique].append(uniqueRow);
        for unique,replicates in replicates_tmp.items():
            #get data from averages once per sample_name_abbreviation/component_name
            #print('exporting sample_name_abbreviation ' + replicates[0]['sample_name_abbreviation'] + " and component_name " + replicates[0]['component_name']);
            # get the averages and %CV samples
            row_ave = {};
            row_ave = self.get_row_experimentIDAndSampleNameAbbreviationAndTimePointAndComponentNameAndCalculatedConcentrationCVAndExtracellularPercent_dataStage01Averages(
                    replicates[0]['experiment_id'],
                    replicates[0]['sample_name_abbreviation'],
                    replicates[0]['time_point'],
                    replicates[0]['component_name'],
                    cv_threshold_I=cv_threshold_I,
                    extracellular_threshold_I=extracellular_threshold_I);
            if row_ave:
                stdev = calc.convert_cv2StDev(row_ave['calculated_concentration_filtrate_average'],row_ave['calculated_concentration_filtrate_cv']);
                row_ave['calculated_concentration_filtrate_lb'] = row_ave['calculated_concentration_filtrate_average']-stdev;
                row_ave['calculated_concentration_filtrate_ub'] = row_ave['calculated_concentration_filtrate_average']+stdev;
                stdev = calc.convert_cv2StDev(row_ave['calculated_concentration_broth_average'],row_ave['calculated_concentration_broth_cv']);
                row_ave['calculated_concentration_broth_lb'] = row_ave['calculated_concentration_broth_average']-stdev;
                row_ave['calculated_concentration_broth_ub'] = row_ave['calculated_concentration_broth_average']+stdev;
                stdev = calc.convert_cv2StDev(row_ave['calculated_concentration_average'],row_ave['calculated_concentration_cv']);
                row_ave['calculated_concentration_lb'] = row_ave['calculated_concentration_average']-stdev;
                row_ave['calculated_concentration_ub'] = row_ave['calculated_concentration_average']+stdev;
                row_ave['analysis_id'] = analysis_id_I;

                # get data from normalized
                filtrate_conc = [];
                broth_conc = [];
                for rep in replicates:
                    row = {};
                    row['analysis_id'] = analysis_id_I;
                    row['extracellular_percent'] = row_ave['extracellular_percent']
                    row['calculated_concentration_cv'] = row_ave['calculated_concentration_cv']
                    row.update(rep)
                    if rep['sample_desc'] == 'Filtrate':
                        data_norm_filtrate.append(row);
                        filtrate_conc.append(rep['calculated_concentration'])
                    if rep['sample_desc'] == 'Broth':
                        data_norm_broth.append(row);
                        broth_conc.append(rep['calculated_concentration'])
                    data_norm_combined.append(row);

                #add data to aggregate and sample_name_abbreviations_all
                if not broth_conc: broth_conc = [0];
                if not filtrate_conc: filtrate_conc = [0];
                row_ave['calculated_concentration_min']=min(broth_conc+filtrate_conc)
                row_ave['calculated_concentration_max']=max(broth_conc+filtrate_conc)
                row_ave['calculated_concentration_broth_min']=min(broth_conc)
                row_ave['calculated_concentration_broth_max']=max(broth_conc)
                row_ave['calculated_concentration_filtrate_min']=min(filtrate_conc)
                row_ave['calculated_concentration_filtrate_max']=max(filtrate_conc)
                data_ave.append(row_ave);

        # dump chart parameters to a js files
        data1_keys = ['analysis_id',
                      'experiment_id',
                      'sample_name',
                      'sample_id',
                      'sample_name_abbreviation',
                      'component_group_name',
                      'component_name',
                      'calculated_concentration_units',
                      'extracellular_percent',
                      'calculated_concentration_cv'
                    ];
        data1_nestkeys = ['component_name'];
        data1_keymap = {'xdata':'component_name',
                        'ydata':'calculated_concentration',
                        #'ydatalb':'peakInfo_lb',
                        #'ydataub':'peakInfo_ub',
                        #'ydatamin':None,
                        #'ydatamax':None,
                        #'ydataiq1':None,
                        #'ydataiq3':None,
                        #'ydatamedian':None,
                        'serieslabel':'sample_name_abbreviation',
                        'featureslabel':'sample_name'};
        data2_keys = ['analysis_id',
                      'experiment_id',
                      'sample_name_abbreviation',
                      'time_point',
                      'component_group_name',
                      'component_name',
                      'calculated_concentration_units',
                      'extracellular_percent',
                      'calculated_concentration_broth_cv'
                    ];
        data2_nestkeys = ['component_name'];
        data2_keymap = {'xdata':'component_name',
                        'ydatamean':'calculated_concentration_broth_average',
                        'ydatalb':'calculated_concentration_broth_lb',
                        'ydataub':'calculated_concentration_broth_ub',
                        'ydatamin':'calculated_concentration_broth_min',
                        'ydatamax':'calculated_concentration_broth_max',
                        #'ydataiq1':None,
                        #'ydataiq3':None,
                        #'ydatamedian':None,
                        'serieslabel':'sample_name_abbreviation',
                        'featureslabel':'component_name'};
        data3_keys = ['analysis_id',
                      'experiment_id',
                      'sample_name_abbreviation',
                      'time_point',
                      'component_group_name',
                      'component_name',
                      'calculated_concentration_units',
                      'extracellular_percent',
                      'calculated_concentration_filtrate_cv',
                    ];
        data3_nestkeys = ['component_name'];
        data3_keymap = {'xdata':'component_name',
                        'ydatamean':'calculated_concentration_filtrate_average',
                        'ydatalb':'calculated_concentration_filtrate_lb',
                        'ydataub':'calculated_concentration_filtrate_ub',
                        'ydatamin':'calculated_concentration_filtrate_min',
                        'ydatamax':'calculated_concentration_filtrate_max',
                        #'ydataiq1':None,
                        #'ydataiq3':None,
                        #'ydatamedian':None,
                        'serieslabel':'sample_name_abbreviation',
                        'featureslabel':'component_name'};
        data4_keys = ['analysis_id',
                      'experiment_id',
                      'sample_name_abbreviation',
                      'time_point',
                      'component_group_name',
                      'component_name',
                      'calculated_concentration_units',
                      'extracellular_percent',
                      'calculated_concentration_cv'
                    ];
        data4_nestkeys = ['component_name'];
        data4_keymap = {'xdata':'component_name',
                        'ydata':'calculated_concentration_average',
                        'ydatamean':'calculated_concentration_average',
                        'ydatalb':'calculated_concentration_lb',
                        'ydataub':'calculated_concentration_ub',
                        #'ydatamin':'calculated_concentration_min',
                        #'ydatamax':'calculated_concentration_max',
                        #'ydataiq1':None,
                        #'ydataiq3':None,
                        #'ydatamedian':None,
                        'serieslabel':'sample_name_abbreviation',
                        'featureslabel':'component_name'};
        # make the data object
        dataobject_O = [{"data":data_norm_broth,"datakeys":data1_keys,"datanestkeys":data1_nestkeys},
                        {"data":data_norm_filtrate,"datakeys":data1_keys,"datanestkeys":data1_nestkeys},
                        {"data":data_norm_combined,"datakeys":data1_keys,"datanestkeys":data1_nestkeys},
                        {"data":data_ave,"datakeys":data2_keys,"datanestkeys":data2_nestkeys},
                        {"data":data_ave,"datakeys":data3_keys,"datanestkeys":data3_nestkeys},
                        {"data":data_ave,"datakeys":data4_keys,"datanestkeys":data4_nestkeys}];
        # make the tile parameter objects for the normalized and averages
        formtileparameters_averages_O = {'tileheader':'Filter menu averages','tiletype':'html','tileid':"filtermenu2",'rowid':"row1",'colid':"col1",
            'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-6"};
        formparameters_averages_O = {'htmlid':'filtermenuform2',"htmltype":'form_01',"formsubmitbuttonidtext":{'id':'submit2','text':'submit'},"formresetbuttonidtext":{'id':'reset2','text':'reset'},"formupdatebuttonidtext":{'id':'update2','text':'update'}};
        formtileparameters_averages_O.update(formparameters_averages_O);
        # make the svg objects for the averages data
        svgparameters_averages_broth_O = {"svgtype":'boxandwhiskersplot2d_02',"svgkeymap":[data2_keymap,data1_keymap],
                            'svgid':'svg4',
                            "svgmargin":{ 'top': 50, 'right': 150, 'bottom': 50, 'left': 50 },
                            "svgwidth":250,"svgheight":250,
                            "svgx1axislabel":"component_name","svgy1axislabel":"concentration",
        		'svgformtileid':'filtermenu2','svgresetbuttonid':'reset2','svgsubmitbuttonid':'submit2'};
        svgtileparameters_averages_broth_O = {'tileheader':'Broth data','tiletype':'svg','tileid':"tile4",'rowid':"row2",'colid':"col1",
            'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-4"};
        svgtileparameters_averages_broth_O.update(svgparameters_averages_broth_O);
        if data_norm_filtrate:
            svgparameters_averages_filtrate_O = {"svgtype":'boxandwhiskersplot2d_02',"svgkeymap":[data3_keymap,data1_keymap],
                            'svgid':'svg5',
                            "svgmargin":{ 'top': 50, 'right': 150, 'bottom': 50, 'left': 50 },
                            "svgwidth":250,"svgheight":250,
                            "svgx1axislabel":"component_name","svgy1axislabel":"concentration",
        		'svgformtileid':'filtermenu2','svgresetbuttonid':'reset2','svgsubmitbuttonid':'submit2'};
            svgtileparameters_averages_filtrate_O = {'tileheader':'Filtrate data','tiletype':'svg','tileid':"tile5",'rowid':"row2",'colid':"col2",
                'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-4"};
            svgtileparameters_averages_filtrate_O.update(svgparameters_averages_filtrate_O);
        else:
            svgparameters_averages_filtrate_O = {"svgtype":'boxandwhiskersplot2d_01',"svgkeymap":[data3_keymap],
                            'svgid':'svg5',
                            "svgmargin":{ 'top': 50, 'right': 150, 'bottom': 50, 'left': 50 },
                            "svgwidth":250,"svgheight":250,
                            "svgx1axislabel":"component_name","svgy1axislabel":"concentration",
        		'svgformtileid':'filtermenu2','svgresetbuttonid':'reset2','svgsubmitbuttonid':'submit2'};
            svgtileparameters_averages_filtrate_O = {'tileheader':'Filtrate data','tiletype':'svg','tileid':"tile5",'rowid':"row2",'colid':"col2",
                'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-4"};
            svgtileparameters_averages_filtrate_O.update(svgparameters_averages_filtrate_O);
        svgparameters_averages_combined_O = {
                            #"svgtype":'boxandwhiskersplot2d_02',
                            "svgtype":'boxandwhiskersplot2d_01',
                            #"svgkeymap":[data4_keymap,data1_keymap],
                            "svgkeymap":[data4_keymap],
                            'svgid':'svg6',
                            "svgmargin":{ 'top': 50, 'right': 150, 'bottom': 50, 'left': 50 },
                            "svgwidth":250,"svgheight":250,
                            "svgx1axislabel":"component_name","svgy1axislabel":"concentration",
    						'svgformtileid':'filtermenu2','svgresetbuttonid':'reset2','svgsubmitbuttonid':'submit2'};
        svgtileparameters_averages_combined_O = {'tileheader':'Broth-Filtrate data','tiletype':'svg','tileid':"tile6",'rowid':"row2",'colid':"col3",
            'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-4"};
        svgtileparameters_averages_combined_O.update(svgparameters_averages_combined_O);
        # make the tables for the normalized and averages data
        tableparameters_normalized_O = {"tabletype":'responsivetable_01',
                    'tableid':'table1',
                    "tablefilters":None,
                    "tableclass":"table  table-condensed table-hover",
    			    'tableformtileid':'filtermenu1','tableresetbuttonid':'reset1','tablesubmitbuttonid':'submit1'};
        tabletileparameters_normalized_O = {'tileheader':'normalized data','tiletype':'table','tileid':"tile7",'rowid':"row4",'colid':"col1",
            'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-12"};
        tabletileparameters_normalized_O.update(tableparameters_normalized_O);
        tableparameters_averages_O = {"tabletype":'responsivetable_01',
                    'tableid':'table2',
                    "tablefilters":None,
                    "tableclass":"table  table-condensed table-hover",
    			    'tableformtileid':'filtermenu2','tableresetbuttonid':'reset2','tablesubmitbuttonid':'submit2'};
        tabletileparameters_averages_O = {'tileheader':'averages data','tiletype':'table','tileid':"tile8",'rowid':"row5",'colid':"col1",
            'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-12"};
        tabletileparameters_averages_O.update(tableparameters_averages_O);
        parametersobject_O = [
            formtileparameters_averages_O,
            svgtileparameters_averages_broth_O,
            svgtileparameters_averages_filtrate_O,
            svgtileparameters_averages_combined_O,
            tabletileparameters_normalized_O,
            tabletileparameters_averages_O];
        tile2datamap_O = {
            "filtermenu2":[5],
            "tile4":[3,0],
            "tile5":[4,1],
            #"tile6":[5,2],
            "tile6":[5],
            "tile7":[2],
            "tile8":[5]
            };
        #if data_norm_filtrate: tile2datamap_O.update({"tile5":[4,1]})
        #else: tile2datamap_O.update({"tile5":[4]})
        filtermenuobject_O = [
            #{"filtermenuid":"filtermenu1","filtermenuhtmlid":"filtermenuform1",
            #"filtermenusubmitbuttonid":"submit1","filtermenuresetbuttonid":"reset1",
            #"filtermenuupdatebuttonid":"update1"},
            {"filtermenuid":"filtermenu2","filtermenuhtmlid":"filtermenuform2",
            "filtermenusubmitbuttonid":"submit2","filtermenuresetbuttonid":"reset2",
            "filtermenuupdatebuttonid":"update2"}
                              ];
        #
        ddtutilities = ddt_container(parameters_I = parametersobject_O,data_I = dataobject_O,tile2datamap_I = tile2datamap_O,filtermenu_I = filtermenuobject_O);
        if data_dir_I=='tmp':
            filename_str = self.settings['visualization_data'] + '/tmp/ddt_data.js'
        elif data_dir_I=='data_json':
            data_json_O = ddtutilities.get_allObjects_js();
            return data_json_O;
        with open(filename_str,'w') as file:
            file.write(ddtutilities.get_allObjects());
    def execute_normalizeSamples2Biomass(self,experiment_id_I,biological_material_I=None,conversion_name_I=None,sample_names_I=[],component_names_I=[],use_height_I=False,sample_types_I=['Unknown']):
        '''Normalize calculated concentrations to measured biomass
         Input:
           experiment_id_I
           biological_material_I =  biological material (if None, no normalization is done)
           conversion_name_I = biomass conversion name (if None, no normalization is done)
           use_height_I = if True, use the ion count for peak height instead of the calculated_concentration or height/area ratio
         Output:
           sample_name
           sample_id
           component_group_name
           component_name
           calculated_concentration
           calculated_concentration_units
           used_
        '''

        data_O=[];
        calc = calculate_interface();
        
        print('execute_normalizeSamples2Biomass...')

        ##SPLIT 1:
        # get the unique sample_names/sample_ids/sample_types/component_names/component_group_names/calculated_concentration_units
        groupJoin = self.getGroupJoin_experimentAndQuantitationMethodAndMQResultsTable_experimentID_dataStage01QuantificationMQResultsTable(
            experiment_id_I,
            sample_types_I=sample_types_I,
            sample_names_I=sample_names_I,
            component_names_I=component_names_I,
            sample_ids_I=[],
            );
        if type(groupJoin)==type(listDict()):
            groupJoin.convert_dataFrame2ListDict()
            groupJoin = groupJoin.get_listDict();
        if (biological_material_I and conversion_name_I):
            # get the conversion units once
            conversion = None;
            conversion_units = None;
            conversion, conversion_units = self.get_conversionAndConversionUnits_biologicalMaterialAndConversionName(biological_material_I,conversion_name_I);
            for row_cnt,row in enumerate(groupJoin):
                print('normalizing samples2Biomass for component_name ' + row['component_name']);
                # get physiological parameters
                cvs = None;
                cvs_units = None;
                od600 = None;
                dil = None;
                dil_units = None;
                cvs, cvs_units, od600, dil,dil_units = self.get_CVSAndCVSUnitsAndODAndDilAndDilUnits_sampleName(row['sample_name']);
                if not(cvs and cvs_units and od600 and dil and dil_units):
                    print('cvs, cvs_units, or od600 are missing from physiological parameters');
                    print('or dil and dil_units are missing from sample descripton');
                    exit(-1);
                elif not(conversion and conversion_units):
                    print('biological_material or conversion name is incorrect');
                    exit(-1);  
                else:
                    #calculate the cell volume or biomass depending on the conversion units
                    #cell_volume, cell_volume_units = calc.calculate_cellVolume_CVSAndCVSUnitsAndODAndConversionAndConversionUnits(cvs,cvs_units,od600,conversion,conversion_units);
                    cell_volume, cell_volume_units = calc.calculate_biomass_CVSAndCVSUnitsAndODAndConversionAndConversionUnits(cvs,cvs_units,od600,conversion,conversion_units);
                # get the calculated concentration
                calc_conc = None;
                calc_conc_units = None;
                #data_row = self.get_row_sampleNameAndComponentName(
                #    row['sample_name'],
                #    row['component_name']);
                if use_height_I: 
                    #calc_conc, calc_conc_units = data_row['height'],'height';
                    calc_conc, calc_conc_units = row['height'],'height';
                elif row['use_calculated_concentration']:
                    #calc_conc, calc_conc_units = data_row['calculated_concentration'],data_row['conc_units'];
                    calc_conc, calc_conc_units = row['calculated_concentration'],row['conc_units'];
                elif not row['use_calculated_concentration'] and row['use_area']:
                    #calc_conc, calc_conc_units = data_row['area_ratio'],'area_ratio';
                    calc_conc, calc_conc_units = row['area_ratio'],'area_ratio';
                elif not row['use_calculated_concentration'] and not row['use_area']:
                    #calc_conc, calc_conc_units = data_row['height_ratio'],'height_ratio';
                    calc_conc, calc_conc_units = row['height_ratio'],'height_ratio';
                # calculate the normalized concentration
                norm_conc = None;
                norm_conc_units = None;
                if calc_conc: 
                    norm_conc, norm_conc_units = calc.calculate_conc_concAndConcUnitsAndDilAndDilUnitsAndConversionAndConversionUnits(calc_conc,calc_conc_units,dil,dil_units,cell_volume, cell_volume_units);
                # update data_stage01_quantification_normalized
                if norm_conc:
                    row = {'experiment_id':experiment_id_I,
                            'sample_name':row['sample_name'],
                            'sample_id':row['sample_id'],
                            'component_group_name':row['component_group_name'],
                            'component_name':row['component_name'],
                            'calculated_concentration':norm_conc,
                            'calculated_concentration_units':norm_conc_units,
                            'used_':True,};
                    data_O.append(row);
        else:
            for row_cnt,row in enumerate(groupJoin):
                print('normalizing samples2Biomass for sample_name ' + row['sample_name'] + ' and component_name ' + row['component_name']);
                # get the calculated concentration
                calc_conc = None;
                calc_conc_units = None;
                #data_row = self.get_row_sampleNameAndComponentName(
                #    row['sample_name'],
                #    row['component_name']);
                if use_height_I: 
                    #calc_conc, calc_conc_units = data_row['height'],'height';
                    calc_conc, calc_conc_units = row['height'],'height';
                elif row['use_calculated_concentration']:
                    #calc_conc, calc_conc_units = data_row['calculated_concentration'],data_row['conc_units'];
                    calc_conc, calc_conc_units = row['calculated_concentration'],row['conc_units'];
                elif not row['use_calculated_concentration'] and row['use_area']:
                    #calc_conc, calc_conc_units = data_row['area_ratio'],'area_ratio';
                    calc_conc, calc_conc_units = row['area_ratio'],'area_ratio';
                elif not row['use_calculated_concentration'] and not row['use_area']:
                    #calc_conc, calc_conc_units = data_row['height_ratio'],'height_ratio';
                    calc_conc, calc_conc_units = row['height_ratio'],'height_ratio';
                # add data to the DB
                if calc_conc: 
                    row = {'experiment_id':experiment_id_I,
                            'sample_name':row['sample_name'],
                            'sample_id':row['sample_id'],
                            'component_group_name':row['component_group_name'],
                            'component_name':row['component_name'],
                            'calculated_concentration':calc_conc,
                            'calculated_concentration_units':calc_conc_units,
                            'used_':True,};
                    data_O.append(row);

        ##SPLIT 2:
        ## get sample names
        #sample_names = [];
        #sample_ids = [];
        #for st in sample_types_I:
        #    sample_names_tmp = [];
        #    sample_ids_tmp = [];
        #    #sample_names_tmp = self.get_sampleNames_experimentIDAndSampleType(experiment_id_I,st);
        #    sample_names_tmp,sample_ids_tmp = self.get_sampleNamesAndSampleIDs_experimentIDAndSampleType(experiment_id_I,st);
        #    sample_names.extend(sample_names_tmp);
        #    sample_ids.extend(sample_ids_tmp); 
        #if sample_names_I:
        #    sample_names_ind = [i for i,x in enumerate(sample_names) if x in sample_names_I];
        #    sample_names_cpy = copy.copy(sample_names);
        #    sample_ids = copy.copy(sample_ids);
        #    sample_names = [x for i,x in enumerate(sample_names) if i in sample_names_ind]
        #    sample_ids = [x for i,x in enumerate(sample_ids) if i in sample_names_ind]
        ## create database table
        #for sn_cnt,sn in enumerate(sample_names):
        #    print('normalizing samples2Biomass for sample_name ' + sn);
        #    # get component names
        #    component_names = [];
        #    component_group_names = [];
        #    #component_names = self.get_componentsNames_experimentIDAndSampleName(experiment_id_I,sn);
        #    component_names,component_group_names = self.get_componentsNamesAndComponentGroupNames_experimentIDAndSampleName(experiment_id_I,sn);
        #    if component_names_I:                
        #        component_names_ind = [i for i,x in enumerate(component_names) if x in component_names_I];
        #        component_names_cpy = copy.copy(component_names);
        #        component_group_names = copy.copy(component_group_names);
        #        component_names = [x for i,x in enumerate(component_names) if i in component_names_ind]
        #        component_group_names = [x for i,x in enumerate(component_group_names) if i in component_names_ind]
        #    ## get sample id
        #    #sample_id = self.get_sampleID_experimentIDAndSampleName(experiment_id_I,sn);
        #    if (biological_material_I and conversion_name_I):
        #        # get physiological parameters
        #        cvs = None;
        #        cvs_units = None;
        #        od600 = None;
        #        dil = None;
        #        dil_units = None;
        #        conversion = None;
        #        conversion_units = None;
        #        cvs, cvs_units, od600, dil,dil_units = self.get_CVSAndCVSUnitsAndODAndDilAndDilUnits_sampleName(sn);
        #        conversion, conversion_units = self.get_conversionAndConversionUnits_biologicalMaterialAndConversionName(biological_material_I,conversion_name_I);
        #        if not(cvs and cvs_units and od600 and dil and dil_units):
        #            print('cvs, cvs_units, or od600 are missing from physiological parameters');
        #            print('or dil and dil_units are missing from sample descripton');
        #            exit(-1);
        #        elif not(conversion and conversion_units):
        #            print('biological_material or conversion name is incorrect');
        #            exit(-1);  
        #        else:
        #            #calculate the cell volume or biomass depending on the conversion units
        #            #cell_volume, cell_volume_units = calc.calculate_cellVolume_CVSAndCVSUnitsAndODAndConversionAndConversionUnits(cvs,cvs_units,od600,conversion,conversion_units);
        #            cell_volume, cell_volume_units = calc.calculate_biomass_CVSAndCVSUnitsAndODAndConversionAndConversionUnits(cvs,cvs_units,od600,conversion,conversion_units);
        #        for cn_cnt,cn in enumerate(component_names):
        #            print('normalizing samples2Biomass for component_name ' + cn);
        #            # get component group name
        #            #component_group_name = self.get_componentGroupName_experimentIDAndComponentName(experiment_id_I,cn);
        #            #component_group_name = self.get_msGroup_componentName_MSComponents(cn);
        #            # get the calculated concentration
        #            calc_conc = None;
        #            calc_conc_units = None;
        #            if use_height_I: 
        #                calc_conc, calc_conc_units = self.get_peakHeight_sampleNameAndComponentName(sn,cn);
        #            else:
        #                calc_conc, calc_conc_units = self.get_concAndConcUnits_sampleNameAndComponentName(sn,cn);
        #            # calculate the normalized concentration
        #            norm_conc = None;
        #            norm_conc_units = None;
        #            if calc_conc: 
        #                norm_conc, norm_conc_units = calc.calculate_conc_concAndConcUnitsAndDilAndDilUnitsAndConversionAndConversionUnits(calc_conc,calc_conc_units,dil,dil_units,cell_volume, cell_volume_units);
        #            # update data_stage01_quantification_normalized
        #            if norm_conc:
        #                row = {'experiment_id':experiment_id_I,
        #                        'sample_name':sn,
        #                        'sample_id':sample_ids[sn_cnt],
        #                        'component_group_name':component_group_names[cn_cnt],
        #                        'component_name':cn,
        #                        'calculated_concentration':norm_conc,
        #                        'calculated_concentration_units':norm_conc_units,
        #                        'used_':True,};
        #                data_O.append(row);
        #    else:
        #        for cn_cnt,cn in enumerate(component_names):
        #            print('normalizing samples2Biomass for component_name ' + cn);
        #            # get component group name
        #            #component_group_name = self.get_componentGroupName_experimentIDAndComponentName(experiment_id_I,cn);
        #            #component_group_name = self.get_msGroup_componentName_MSComponents(cn);
        #            # get the calculated concentration
        #            calc_conc = None;
        #            calc_conc_units = None;
        #            if use_height_I: 
        #                calc_conc, calc_conc_units = self.get_peakHeight_sampleNameAndComponentName(sn,cn);
        #            else:
        #                calc_conc, calc_conc_units = self.get_concAndConcUnits_sampleNameAndComponentName(sn,cn);
        #            # add data to the DB
        #            row = {'experiment_id':experiment_id_I,
        #                    'sample_name':sn,
        #                    'sample_id':sample_ids[sn_cnt],
        #                    'component_group_name':component_group_names[cn_cnt],
        #                    'component_name':cn,
        #                    'calculated_concentration':calc_conc,
        #                    'calculated_concentration_units':calc_conc_units,
        #                    'used_':True,};
        #            data_O.append(row);

        self.add_rows_table('data_stage01_quantification_normalized',data_O);
    def execute_analyzePeakInformation(self,experiment_id_I,sample_names_I=[],
                            sample_types_I=['Standard'],
                            component_names_I=[],
                            peakInfo_I = ['height','retention_time','width_at_50','signal_2_noise'],
                            acquisition_date_and_time_I=[None,None]):
        '''Analyze retention-time, height, s/n, and assymetry
        INPUT:
        experiment_id_I
        sample_names_I
        sample_types_I
        component_names_I
        peakInfo_I
        acquisition_date_and_time_I = ['%m/%d/%Y %H:%M','%m/%d/%Y %H:%M']
        '''

        print('execute_peakInformation...')
        
        #convert string date time to datetime
        # e.g. time.strptime('4/15/2014 15:51','%m/%d/%Y %H:%M')
        acquisition_date_and_time = [];
        if acquisition_date_and_time_I and acquisition_date_and_time_I[0] and acquisition_date_and_time_I[1]:
            for dateandtime in acquisition_date_and_time_I:
                time_struct = strptime(dateandtime,'%m/%d/%Y %H:%M')
                dt = datetime.fromtimestamp(mktime(time_struct))
                acquisition_date_and_time.append(dt);
        else: acquisition_date_and_time=[None,None]
        data_O = [];
        component_names_all = [];
        # get sample names
        if sample_names_I and sample_types_I and len(sample_types_I)==1:
            sample_names = sample_names_I;
            sample_types = [sample_types_I[0] for sn in sample_names];
        else:
            sample_names = [];
            sample_types = [];
            for st in sample_types_I:
                sample_names_tmp = [];
                sample_names_tmp = self.get_sampleNames_experimentIDAndSampleType(experiment_id_I,st);
                sample_names.extend(sample_names_tmp);
                sample_types_tmp = [];
                sample_types_tmp = [st for sn in sample_names_tmp];
                sample_types.extend(sample_types_tmp);
        print(str(len(sample_names)) + ' total samples');
        for sn in sample_names:
            print('analyzing peakInformation for sample_name ' + sn);
            # get sample description
            desc = {};
            desc = self.get_description_experimentIDAndSampleID_sampleDescription(experiment_id_I,sn);
            # get component names
            if component_names_I:
                component_names = component_names_I;
            else:
                component_names = [];
                component_names = self.get_componentsNames_experimentIDAndSampleName(experiment_id_I,sn);
            component_names_all.extend(component_names);
            for cn in component_names:
                # get rt, height, s/n
                sst_data = {};
                sst_data = self.get_peakInfo_sampleNameAndComponentName(sn,cn,acquisition_date_and_time);
                if sst_data:
                    tmp = {};
                    tmp.update(sst_data);
                    tmp.update(desc);
                    tmp.update({'sample_name':sn});
                    data_O.append(tmp);
        #TODO:
        # 1. make a calculation method
        # calculate statistics for specific parameters
        data_add = [];
        component_names_unique = list(set(component_names_all));
        component_names_unique.sort();
        # math utilities
        from math import sqrt
        calc = calculate_interface();
        for cn in component_names_unique:
            data_parameters = {};
            data_parameters_stats = {};
            for parameter in peakInfo_I:
                data_parameters[parameter] = [];
                data_parameters_stats[parameter] = {'ave':None,'var':None,'cv':None,'lb':None,'ub':None};
                acquisition_date_and_times = [];
                sample_names_parameter = [];
                sample_types_parameter = [];
                component_group_name = None;
                for sn_cnt,sn in enumerate(sample_names):
                    for d in data_O:
                        if d['sample_name'] == sn and d['component_name'] == cn and d[parameter]:
                            data_parameters[parameter].append(d[parameter]);
                            acquisition_date_and_times.append(d['acquisition_date_and_time'])
                            sample_names_parameter.append(sn);
                            sample_types_parameter.append(sample_types[sn_cnt])
                            component_group_name = d['component_group_name'];
                ave,var,lb,ub = None,None,None,None;
                if len(data_parameters[parameter])>1:ave,var,lb,ub = calc.calculate_ave_var(data_parameters[parameter]);
                if ave:
                    cv = sqrt(var)/ave*100;
                    data_parameters_stats[parameter] = {'ave':ave,'var':var,'cv':cv,'lb':lb,'ub':ub};
                    # add data to the DB
                    row = {'experiment_id':experiment_id_I,
                        'component_group_name':component_group_name,
                        'component_name':cn,
                        'peakInfo_parameter':parameter,
                        'peakInfo_ave':data_parameters_stats[parameter]['ave'],
                        'peakInfo_cv':data_parameters_stats[parameter]['cv'],
                        'peakInfo_lb':data_parameters_stats[parameter]['lb'],
                        'peakInfo_ub':data_parameters_stats[parameter]['ub'],
                        'peakInfo_units':None,
                        'sample_names':sample_names_parameter,
                        'sample_types':sample_types_parameter,
                        'acqusition_date_and_times':acquisition_date_and_times,
                        'peakInfo_data':data_parameters[parameter],
                        'used_':True,
                        'comment_':None,};
                    data_add.append(row);
        self.add_rows_table('data_stage01_quantification_peakInformation',data_add);
    def execute_analyzeAverages(self,experiment_id_I,sample_name_abbreviations_I=[],sample_names_I=[],component_names_I=[]):
        '''calculate the averages using the formula ave(broth),i - ave(filtrate),i
        NOTE: data_stage01_quantification_normalized must be populated
        Input:
        experiment_id_I
        sample_name_abbreviations_I
        sample_names_I
        component_names_I
        Output:
        sample_name_abbreviation
        component_group_name
        component_name
        concentration average
        concentration CV
        concentration units
        % extracellular
        '''

        data_O=[];
        calc = calculate_interface();
        
        print('execute_analyzeAverages...')
        # get sample_name_abbreviations
        if sample_name_abbreviations_I:
            sample_name_abbreviations = sample_name_abbreviations_I
        else:
            sample_name_abbreviations = [];
            sample_name_abbreviations = self.get_sampleNameAbbreviations_experimentID_dataStage01Normalized(experiment_id_I);
        for sna in sample_name_abbreviations:
            print('analyzing averages for sample_name_abbreviation ' + sna);
            # get component names
            if component_names_I:
                component_names = component_names_I
            else:
                component_names = [];
                component_names = self.get_componentsNames_experimentIDAndSampleNameAbbreviation_dataStage01Normalized(experiment_id_I,sna);
            for cn in component_names:
                print('analyzing averages for component_name ' + cn);
                component_group_name = self.get_componentGroupName_experimentIDAndComponentName_dataStage01Normalized(experiment_id_I,cn);
                # get time points
                time_points = self.get_timePoint_experimentIDAndSampleNameAbbreviation_dataStage01Normalized(experiment_id_I,sna);
                if not time_points: continue;
                for tp in time_points:
                    print('analyzing averages for time_point ' + tp);
                    # get filtrate sample names
                    sample_names = [];
                    sample_description = 'Filtrate';
                    sample_names = self.get_sampleNames_experimentIDAndSampleNameAbbreviationAndSampleDescriptionAndComponentNameAndTimePoint_dataStage01Normalized(experiment_id_I,sna,sample_description,cn,tp);
                    if sample_names_I: # screen out sample names that are not in the input
                        sample_names = [x for x in sample_names if x in sample_names_I];
                    concs = [];
                    conc_units = None;
                    for sn in sample_names:
                        # concentrations and units
                        conc = None;
                        conc_unit = None;
                        conc, conc_unit = self.get_concAndConcUnits_sampleNameAndComponentName_dataStage01Normalized(sn,cn);
                        if (not(conc) or conc==0): continue
                        if (conc_unit): conc_units = conc_unit;
                        concs.append(conc);
                    n_replicates_filtrate = len(concs);
                    conc_average_filtrate = 0.0;
                    conc_var_filtrate = 0.0;
                    conc_cv_filtrate = 0.0;
                    # calculate average and CV of concentrations
                    if (not(concs)): 
                        conc_average_filtrate = 0;
                        conc_var_filtrate = 0;
                    elif n_replicates_filtrate<2: 
                        conc_average_filtrate = concs[0];
                        conc_var_filtrate = 0;
                    else: 
                        #conc_average_filtrate, conc_var_filtrate = calc.calculate_ave_var_R(concs);
                        conc_average_filtrate = numpy.mean(numpy.array(concs));
                        conc_var_filtrate = numpy.var(numpy.array(concs));
                        if (conc_average_filtrate <= 0): conc_cv_filtrate = 0;
                        else: conc_cv_filtrate = sqrt(conc_var_filtrate)/conc_average_filtrate*100; 
                    # get broth sample names
                    sample_names = [];
                    sample_description = 'Broth';
                    sample_names = self.get_sampleNames_experimentIDAndSampleNameAbbreviationAndSampleDescriptionAndComponentNameAndTimePoint_dataStage01Normalized(experiment_id_I,sna,sample_description,cn,tp);
                    if sample_names_I: # screen out sample names that are not in the input
                        sample_names = [x for x in sample_names if x in sample_names_I];
                    concs = [];
                    conc_units = None;
                    for sn in sample_names:
                        print('analyzing averages for sample_name ' + sn);
                        # query concentrations and units
                        conc = None;
                        conc_unit = None;
                        conc, conc_unit = self.get_concAndConcUnits_sampleNameAndComponentName_dataStage01Normalized(sn,cn);
                        if (not(conc) or conc==0): continue
                        if (conc_unit): conc_units = conc_unit;
                        concs.append(conc);
                    n_replicates = len(concs);
                    conc_average_broth = 0.0;
                    conc_var_broth = 0.0;
                    conc_cv_broth = 0.0;
                    # calculate average and CV of concentrations
                    if (not(concs)): 
                        continue
                    elif n_replicates<2: 
                        continue
                    else: 
                        #conc_average_broth, conc_var_broth = calc.calculate_ave_var_R(concs);
                        conc_average_broth = numpy.mean(numpy.array(concs));
                        conc_var_broth = numpy.var(numpy.array(concs));
                        if (conc_average_broth <= 0): conc_cv_broth = 0;
                        else: conc_cv_broth = sqrt(conc_var_broth)/conc_average_broth*100; 
                    # calculate average and CV
                    conc_average = 0.0;
                    conc_var = 0.0;
                    conc_cv = 0.0;
                    conc_average = conc_average_broth-conc_average_filtrate;
                    if (conc_average < 0): conc_average = 0;
                    conc_var = conc_var_broth + conc_var_filtrate;
                    if (conc_average <= 0): conc_cv = 0;
                    else: conc_cv = sqrt(conc_var)/conc_average*100;
                    # calculate the % extracellular
                    extracellular_percent = conc_average_filtrate/conc_average_broth*100;
                    # add data to the session
                    row = {};
                    row = {'experiment_id':experiment_id_I,
                        'sample_name_abbreviation':sna,
                        'time_point':tp,
                        'component_group_name':component_group_name,
                        'component_name':cn,
                        'n_replicates_broth':n_replicates,
                        'calculated_concentration_broth_average':conc_average_broth,
                        'calculated_concentration_broth_cv':conc_cv_broth,
                        'n_replicates_filtrate':n_replicates_filtrate,
                        'calculated_concentration_filtrate_average':conc_average_filtrate,
                        'calculated_concentration_filtrate_cv':conc_cv_filtrate,
                        'n_replicates':n_replicates,
                        'calculated_concentration_average':conc_average,
                        'calculated_concentration_cv':conc_cv,
                        'calculated_concentration_units':conc_units,
                        'extracellular_percent':extracellular_percent,
                        'used_':True,};
                    data_O.append(row)
        self.add_rows_table('data_stage01_quantification_averages',data_O);
    def execute_analyzeAverages_blanks(self,experiment_id_I,
            sample_name_abbreviations_I=[],
            sample_names_I=[],
            component_names_I=[],
            blank_sample_names_I=[],
            blank_sample_name_abbreviations_I=[],
            ):
        '''calculate the averages using the ave(broth),i - ave(blank,broth)
        NOTE: data_stage01_quantification_normalized must be populated
        Input:
        experiment_id_I
        sample_name_abbreviations_I
        sample_names_I
        component_names_I
        blank_sample_names_I = []; if specified, specific blank samples will be used as the filtrate instead of filtrate samples
        Output:
        sample_name_abbreviation
        component_group_name
        component_name
        concentration average
        concentration CV
        concentration units
        % extracellular
        '''

        data_O=[];
        calc = calculate_interface();
        
        print('execute_analyzeAverages...')        
        #SPLIT 1:
        #1 query unique calculated_concentration_units/sample_name_abbreviations/component_names/component_group_names/time_points/sample_names/sample_ids/sample_description
        uniqueRows_all = self.getQueryResult_groupNormalizedAveragesSamples_experimentID_dataStage01QuantificationNormalizedAndAverages_limsSampleAndSampleID(
                experiment_id_I
            );
        #2 filter in broth samples
        uniqueRows = self.filter_groupNormalizedAveragesSamples_experimentID_dataStage01QuantificationNormalizedAndAverages_limsSampleAndSampleID(
                uniqueRows_all,
                calculated_concentration_units_I=[],
                component_names_I=component_names_I,
                component_group_names_I=[],
                sample_names_I=sample_names_I,
                sample_name_abbreviations_I=sample_name_abbreviations_I,
                time_points_I=[],
            );
        if type(uniqueRows)==type(listDict()):
            uniqueRows.convert_dataFrame2ListDict()
            uniqueRows = uniqueRows.get_listDict();
        data_tmp = {};#reorganize the data into a dictionary for quick traversal of the replicates
        for uniqueRow_cnt,uniqueRow in enumerate(uniqueRows):
            unique = (uniqueRow['sample_name_abbreviation'],
                      uniqueRow['experiment_id'],
                      uniqueRow['time_point'],
                      uniqueRow['component_name'],
                      uniqueRow['calculated_concentration_units'])
            if not unique in data_tmp.keys():
                data_tmp[unique] = [];
            data_tmp[unique].append(uniqueRow);
            
        #3 filter in blank samples
        uniqueBlanks=[];
        if blank_sample_names_I or blank_sample_name_abbreviations_I:
            uniqueBlanks = self.filter_groupNormalizedAveragesSamples_experimentID_dataStage01QuantificationNormalizedAndAverages_limsSampleAndSampleID(
                uniqueRows_all,
                calculated_concentration_units_I=[],
                component_names_I=component_names_I,
                component_group_names_I=[],
                sample_names_I=blank_sample_names_I,
                sample_name_abbreviations_I=blank_sample_name_abbreviations_I,
                time_points_I=[],
                );
        if type(uniqueBlanks)==type(listDict()):
            uniqueBlanks.convert_dataFrame2ListDict()
            uniqueBlanks = uniqueBlanks.get_listDict();
        data_blanks_tmp = {}; #reorganize the data for a quick traversal of the components
        for uniqueBlanks_cnt,uniqueBlank in enumerate(uniqueBlanks):
            unique = uniqueBlank['component_name']
            if not unique in data_tmp.keys():
                data_blanks_tmp[unique] = [];
            data_blanks_tmp[unique].append(uniqueBlank);

        #4 iterate through each unique unique calculated_concentration_units/sample_name_abbreviations/component_names/component_group_names/time_points
        # and determine the ave, cv, etc., after subtracting out the blanks
        for unique,replicates in data_tmp.items():
            print('analyzing averages for sample_name_abbreviation ' + replicates[0]['sample_name_abbreviation'] + ' and component_name ' + replicates[0]['component_name']);
            # get blank concentrations
            if data_blanks_tmp and replicates[0]['component_name'] in data_blanks_tmp.keys():
                concs = [d['calculated_concentration'] for d in data_blanks_tmp[replicates[0]['component_name']]
                         if not d['calculated_concentration'] is None and d['calculated_concentration']!=0];
                conc_units = [d['calculated_concentration_units'] for d in data_blanks_tmp[replicates[0]['component_name']]
                         if not d['calculated_concentration'] is None and d['calculated_concentration']!=0];
                if conc_units: conc_units = conc_units[0];
                else: conc_units = None;
            else:
                concs = [];
                conc_units = None;

            #if blank_sample_names_I:
            #    sample_names = blank_sample_names_I;
            #else:
            #    sample_names = [];
            #concs = [];
            #conc_units = None;
            #for sn in sample_names:
            #    # concentrations and units
            #    conc = None;
            #    conc_unit = None;
            #    conc, conc_unit = self.get_concAndConcUnits_sampleNameAndComponentName_dataStage01Normalized(
            #        sn,
            #        replicates[0]['component_name']);
            #    if (not(conc) or conc==0): continue
            #    if (conc_unit): conc_units = conc_unit;
            #    concs.append(conc);
            n_replicates_filtrate = len(concs);
            conc_average_filtrate = 0.0;
            conc_var_filtrate = 0.0;
            conc_cv_filtrate = 0.0;
            # calculate average and CV of concentrations
            if (not(concs)): 
                conc_average_filtrate = 0;
                conc_var_filtrate = 0;
            elif n_replicates_filtrate<2: 
                conc_average_filtrate = concs[0];
                conc_var_filtrate = 0;
            else: 
                #conc_average_filtrate, conc_var_filtrate = calc.calculate_ave_var_R(concs);
                conc_average_filtrate = numpy.mean(numpy.array(concs));
                conc_var_filtrate = numpy.var(numpy.array(concs));
                if (conc_average_filtrate <= 0): conc_cv_filtrate = 0;
                else: conc_cv_filtrate = sqrt(conc_var_filtrate)/conc_average_filtrate*100; 
            # get broth sample names
            concs = [d['calculated_concentration'] for d in replicates if d['sample_desc']=='Broth'
                     and not d['calculated_concentration'] is None and d['calculated_concentration']!=0];
            conc_units = [d['calculated_concentration_units'] for d in replicates if d['sample_desc']=='Broth'
                     and not d['calculated_concentration'] is None and d['calculated_concentration']!=0];
            if conc_units: conc_units = conc_units[0];
            else: conc_units = None;
            #concs = [];
            #conc_units = None;
            #sample_names = [d['sample_name'] for d in replicates if d['sample_desc']=='Broth'];
            #for sn in sample_names:
            #    # query concentrations and units
            #    conc = None;
            #    conc_unit = None;
            #    conc, conc_unit = self.get_concAndConcUnits_sampleNameAndComponentName_dataStage01Normalized(
            #        sn,
            #        replicates[0]['component_name']);
            #    if (not(conc) or conc==0): continue
            #    if (conc_unit): conc_units = conc_unit;
            #    concs.append(conc);
            n_replicates = len(concs);
            conc_average_broth = 0.0;
            conc_var_broth = 0.0;
            conc_cv_broth = 0.0;
            # calculate average and CV of concentrations
            if (not(concs)): 
                continue
            elif n_replicates<2: 
                continue
            else: 
                #conc_average_broth, conc_var_broth = calc.calculate_ave_var_R(concs);
                conc_average_broth = numpy.mean(numpy.array(concs));
                conc_var_broth = numpy.var(numpy.array(concs));
                if (conc_average_broth <= 0): conc_cv_broth = 0;
                else: conc_cv_broth = sqrt(conc_var_broth)/conc_average_broth*100; 
            # calculate average and CV
            conc_average = 0.0;
            conc_var = 0.0;
            conc_cv = 0.0;
            conc_average = conc_average_broth-conc_average_filtrate;
            if (conc_average < 0): conc_average = 0;
            conc_var = conc_var_broth + conc_var_filtrate;
            if (conc_average <= 0): conc_cv = 0;
            else: conc_cv = sqrt(conc_var)/conc_average*100;
            # calculate the % extracellular
            extracellular_percent = conc_average_filtrate/conc_average_broth*100;
            # add data to the session
            row = {};
            row = {'experiment_id':experiment_id_I,
                    'sample_name_abbreviation':replicates[0]['sample_name_abbreviation'],
                    'time_point':replicates[0]['time_point'],
                    'component_group_name':replicates[0]['component_group_name'],
                    'component_name':replicates[0]['component_name'],
                    'n_replicates_broth':n_replicates,
                    'calculated_concentration_broth_average':conc_average_broth,
                    'calculated_concentration_broth_cv':conc_cv_broth,
                    'n_replicates_filtrate':n_replicates_filtrate,
                    'calculated_concentration_filtrate_average':conc_average_filtrate,
                    'calculated_concentration_filtrate_cv':conc_cv_filtrate,
                    'n_replicates':n_replicates,
                    'calculated_concentration_average':conc_average,
                    'calculated_concentration_cv':conc_cv,
                    'calculated_concentration_units':conc_units,
                    'extracellular_percent':extracellular_percent,
                    'used_':True,};
            data_O.append(row);

        ##SPLIT2
        ## get sample_name_abbreviations
        #if sample_name_abbreviations_I:
        #    sample_name_abbreviations = sample_name_abbreviations_I
        #else:
        #    sample_name_abbreviations = [];
        #    sample_name_abbreviations = self.get_sampleNameAbbreviations_experimentID_dataStage01Normalized(experiment_id_I);
        #for sna in sample_name_abbreviations:
        #    print('analyzing averages for sample_name_abbreviation ' + sna);
        #    # get component names
        #    if component_names_I:
        #        component_names = component_names_I
        #    else:
        #        component_names = [];
        #        component_names = self.get_componentsNames_experimentIDAndSampleNameAbbreviation_dataStage01Normalized(experiment_id_I,sna);
        #    for cn in component_names:
        #        print('analyzing averages for component_name ' + cn);
        #        component_group_name = self.get_componentGroupName_experimentIDAndComponentName_dataStage01Normalized(experiment_id_I,cn);
        #        # get time points
        #        time_points = self.get_timePoint_experimentIDAndSampleNameAbbreviation_dataStage01Normalized(experiment_id_I,sna);
        #        if not time_points: continue;
        #        for tp in time_points:
        #            print('analyzing averages for time_point ' + tp);
        #            # get blank concentrations
        #            if blank_sample_names_I:
        #                sample_names = blank_sample_names_I;
        #            else:
        #                sample_names = [];
        #            concs = [];
        #            conc_units = None;
        #            for sn in sample_names:
        #                # concentrations and units
        #                conc = None;
        #                conc_unit = None;
        #                conc, conc_unit = self.get_concAndConcUnits_sampleNameAndComponentName_dataStage01Normalized(sn,cn);
        #                if (not(conc) or conc==0): continue
        #                if (conc_unit): conc_units = conc_unit;
        #                concs.append(conc);
        #            n_replicates_filtrate = len(concs);
        #            conc_average_filtrate = 0.0;
        #            conc_var_filtrate = 0.0;
        #            conc_cv_filtrate = 0.0;
        #            # calculate average and CV of concentrations
        #            if (not(concs)): 
        #                conc_average_filtrate = 0;
        #                conc_var_filtrate = 0;
        #            elif n_replicates_filtrate<2: 
        #                conc_average_filtrate = concs[0];
        #                conc_var_filtrate = 0;
        #            else: 
        #                #conc_average_filtrate, conc_var_filtrate = calc.calculate_ave_var_R(concs);
        #                conc_average_filtrate = numpy.mean(numpy.array(concs));
        #                conc_var_filtrate = numpy.var(numpy.array(concs));
        #                if (conc_average_filtrate <= 0): conc_cv_filtrate = 0;
        #                else: conc_cv_filtrate = sqrt(conc_var_filtrate)/conc_average_filtrate*100; 
        #            # get broth sample names
        #            sample_names = [];
        #            sample_description = 'Broth';
        #            sample_names = self.get_sampleNames_experimentIDAndSampleNameAbbreviationAndSampleDescriptionAndComponentNameAndTimePoint_dataStage01Normalized(experiment_id_I,sna,sample_description,cn,tp);
        #            if sample_names_I: # screen out sample names that are not in the input
        #                sample_names = [x for x in sample_names if x in sample_names_I];
        #            concs = [];
        #            conc_units = None;
        #            for sn in sample_names:
        #                print('analyzing averages for sample_name ' + sn);
        #                # query concentrations and units
        #                conc = None;
        #                conc_unit = None;
        #                conc, conc_unit = self.get_concAndConcUnits_sampleNameAndComponentName_dataStage01Normalized(sn,cn);
        #                if (not(conc) or conc==0): continue
        #                if (conc_unit): conc_units = conc_unit;
        #                concs.append(conc);
        #            n_replicates = len(concs);
        #            conc_average_broth = 0.0;
        #            conc_var_broth = 0.0;
        #            conc_cv_broth = 0.0;
        #            # calculate average and CV of concentrations
        #            if (not(concs)): 
        #                continue
        #            elif n_replicates<2: 
        #                continue
        #            else: 
        #                #conc_average_broth, conc_var_broth = calc.calculate_ave_var_R(concs);
        #                conc_average_broth = numpy.mean(numpy.array(concs));
        #                conc_var_broth = numpy.var(numpy.array(concs));
        #                if (conc_average_broth <= 0): conc_cv_broth = 0;
        #                else: conc_cv_broth = sqrt(conc_var_broth)/conc_average_broth*100; 
        #            # calculate average and CV
        #            conc_average = 0.0;
        #            conc_var = 0.0;
        #            conc_cv = 0.0;
        #            conc_average = conc_average_broth-conc_average_filtrate;
        #            if (conc_average < 0): conc_average = 0;
        #            conc_var = conc_var_broth + conc_var_filtrate;
        #            if (conc_average <= 0): conc_cv = 0;
        #            else: conc_cv = sqrt(conc_var)/conc_average*100;
        #            # calculate the % extracellular
        #            extracellular_percent = conc_average_filtrate/conc_average_broth*100;
        #            # add data to the session
        #            row = {};
        #            row = {'experiment_id':experiment_id_I,
        #                    'sample_name_abbreviation':sna,
        #                    'time_point':tp,
        #                    'component_group_name':component_group_name,
        #                    'component_name':cn,
        #                    'n_replicates_broth':n_replicates,
        #                    'calculated_concentration_broth_average':conc_average_broth,
        #                    'calculated_concentration_broth_cv':conc_cv_broth,
        #                    'n_replicates_filtrate':n_replicates_filtrate,
        #                    'calculated_concentration_filtrate_average':conc_average_filtrate,
        #                    'calculated_concentration_filtrate_cv':conc_cv_filtrate,
        #                    'n_replicates':n_replicates,
        #                    'calculated_concentration_average':conc_average,
        #                    'calculated_concentration_cv':conc_cv,
        #                    'calculated_concentration_units':conc_units,
        #                    'extracellular_percent':extracellular_percent,
        #                    'used_':True,};
        #            data_O.append(row);

        self.add_rows_table('data_stage01_quantification_averages',data_O);
 def execute_calculateRatesAverages(self,experiment_id_I,sample_name_abbreviations_I=[],met_ids_I=[]):
     '''Calculate the average rates based on the rates of the replicates'''
     
     calc = calculate_interface();
     data_O = [];
     #query sample_name abbreviations
     print('execute calcute rates averages...')
     if sample_name_abbreviations_I:
         sample_name_abbreviations = sample_name_abbreviations_I;
     else:
         sample_name_abbreviations = [];
         sample_name_abbreviations = self.get_sampleNameAbbreviations_experimentID_dataStage01PhysiologyRates(experiment_id_I,6);
     for sna in sample_name_abbreviations:
         print('calculating rates averages for sample_name_abbreviation ' + sna);
         #query met_ids
         if met_ids_I:
             met_ids = met_ids_I;
         else:
             met_ids = [];
             met_ids = self.get_metIDs_experimentIDAndSampleNameAbbreviation_dataStage01PhysiologyRates(experiment_id_I,6,sna)
         for met in met_ids:
             print('calculating rates averages for met_id ' +met);
             #query sample names
             sample_name_short = [];
             sample_name_short = self.get_sampleNameShort_experimentIDAndSampleNameAbbreviationAndMetID_dataStage01PhysiologyRates(experiment_id_I,6,sna,met)
             slopes, intercepts, rates, rates_units, std_errs = [],[],[],[],[];
             for sns in sample_name_short:
                 #query slope, intercept, and rate
                 slope, intercept, r2, rate, rate_units, p_value, std_err = None,None,None,None,None,None,None;
                 slope, intercept, r2, rate, rate_units, p_value, std_err = self.get_rateData_experimentIDAndSampleNameShortAndMetID_dataStage01PhysiologyRates(experiment_id_I,sns,met);
                 if rate:
                     slopes.append(slope);
                     intercepts.append(intercept);
                     rates.append(rate);
                     rates_units.append(rate_units);
                     std_errs.append(std_err);
             #calculate the average, variance, and 95% confidence intervals
             n = len(rates);
             slopes_ave, slopes_var, slopes_lb, slopes_ub = None,None,None,None;
             intercepts_ave, intercepts_var, intercepts_lb = None,None,None;
             rates_ave, rates_var, rates_lb, rates_ub = None, None, None, None;
             if not None in slopes:
                 slopes_ave, slopes_var, slopes_lb, slopes_ub = calc.calculate_ave_var(slopes);
             if not None in intercepts:
                 intercepts_ave, intercepts_var, intercepts_lb, intercepts_ub = calc.calculate_ave_var(intercepts);
             if not None in rates:
                 rates_ave, rates_var, rates_lb, rates_ub = calc.calculate_ave_var(rates);
             #add rows to the data base
             row = {};
             row = {'experiment_id':experiment_id_I,
                 'sample_name_abbreviation':sna,
                 'met_id':met,
                 'n':n,
                 'slope_average':slopes_ave,
                 'intercept_average':intercepts_ave,
                 'rate_average':rates_ave,
                 'rate_var':rates_var,
                 'rate_lb':rates_lb,
                 'rate_ub':rates_ub,
                 'rate_units':rates_units[0],
                 'used_':True,
                 'comment_':None,};
             data_O.append(row);
     #add data to the DB
     self.add_rows_table('data_stage01_physiology_ratesAverages',data_O);
Exemplo n.º 21
0
    def calculate_coverageStats_fromGff(self,gff_file, 
         strand_start,strand_stop,scale_factor=True,downsample_factor=2000,
         experiment_id_I=None, sample_name_I=None):
        """extract coverage (genome position and reads) from .gff
        INPUT:
        strand_start = index of the start position
        strand_stop = index of the stop position
        scale_factor = boolean, if true, reads will be normalized to have 100 max
        downsample_factor = integer, factor to downsample the points to
     
        OPTION INPUT:
        experiment_id_I = tag for the experiment from which the sample came
        sample_name_I = tag for the sample name
        
        """
        calculate = calculate_interface();

        self.set_gffFile(gff_file);
        filename = self.gff_file;
        experiment_id = experiment_id_I;
        sn = sample_name_I;
        # parse the gff file into pandas dataframes
        self.extract_strandsFromGff(strand_start, strand_stop, scale=scale_factor, downsample=downsample_factor)
        # split into seperate data structures based on the destined table add
        coverageStats_data = [];
        # plus strand
        # calculate using scipy
        data_ave_O, data_var_O, data_lb_O, data_ub_O = None, None, None, None;
        data_ave_O, data_var_O, data_lb_O, data_ub_O = calculate.calculate_ave_var(self.plus.values,confidence_I = 0.95);
        # calculate the interquartile range
        min_O, max_O, median_O, iq_1_O, iq_3_O = None, None, None, None, None;
        min_O, max_O, median_O, iq_1_O, iq_3_O=calculate.calculate_interquartiles(self.plus.values);
        # record data
        coverageStats_data.append({
            #'analysis_id':analysis_id,
            'experiment_id':experiment_id,
            'sample_name':sn,
            'genome_chromosome':1,
            'genome_strand':'plus',
            'strand_start':strand_start,
            'strand_stop':strand_stop,
            'reads_min':int(min_O),
            'reads_max':int(max_O),
            'reads_lb':data_lb_O,
            'reads_ub':data_ub_O,
            'reads_iq1':iq_1_O,
            'reads_iq3':iq_3_O,
            'reads_median':median_O,
            'reads_mean':data_ave_O,
            'reads_var':data_var_O,
            'reads_n':len(self.plus.values),
            'used_':True,
            'comment_':None});
        # minus strand
        # calculate using scipy
        data_ave_O, data_var_O, data_lb_O, data_ub_O = None, None, None, None;
        data_ave_O, data_var_O, data_lb_O, data_ub_O = calculate.calculate_ave_var(self.minus.values,confidence_I = 0.95);
        # calculate the interquartile range
        min_O, max_O, median_O, iq_1_O, iq_3_O = None, None, None, None, None;
        min_O, max_O, median_O, iq_1_O, iq_3_O=calculate.calculate_interquartiles(self.minus.values);
        # record data
        coverageStats_data.append({
            #'analysis_id':analysis_id,
            'experiment_id':experiment_id,
            'sample_name':sn,
            'genome_chromosome':1,
            'genome_strand':'minus',
            'strand_start':strand_start,
            'strand_stop':strand_stop,
            'reads_min':int(min_O),
            'reads_max':int(max_O),
            'reads_lb':data_lb_O,
            'reads_ub':data_ub_O,
            'reads_iq1':iq_1_O,
            'reads_iq3':iq_3_O,
            'reads_median':median_O,
            'reads_mean':data_ave_O,
            'reads_var':data_var_O,
            'reads_n':len(self.minus.values),
            'used_':True,
            'comment_':None});
        # record the data
        self.coverageStats = coverageStats_data;
 def execute_calculateYield(self,experiment_id_I,sample_name_short_I=[],uptake_mets_I=[]):
     '''Calculate the yield from the growth rate and the uptake rates'''
     
     calc = calculate_interface();
     #query sample names
     print('executing calculating yield...')
     if sample_name_short_I:
         sample_name_short = sample_name_short_I;
     else:
         sample_name_short = [];
         sample_name_short = self.get_sampleNameShort_experimentID_dataStage01PhysiologyRates(experiment_id_I)
     for sns in sample_name_short:
         print('calculating yield for sample_name_short ' + sns);
         #query met_ids
         met_ids = [];
         met_ids = self.get_metIDs_experimentIDAndSampleNameShort_dataStage01PhysiologyRates(experiment_id_I,sns);
         # check for biomass
         if 'biomass' not in met_ids:
             print('no growth rate found!');
             continue;
         # get the biomass physiological rates
         slope_biomass, intercept_biomass, r2_biomass, rate_biomass, units_biomass, p_value_biomass, std_err_biomass = None,None,None,None,None,None,None;
         slope_biomass, intercept_biomass, r2_biomass, rate_biomass, units_biomass, p_value_biomass, std_err_biomass = self.get_rateData_experimentIDAndSampleNameShortAndMetID_dataStage01PhysiologyRates(experiment_id_I,sns,'biomass');
         # check for uptake metabolites and get the uptake metabolite rates
         uptake_rates = [];
         uptake_units = [];
         if uptake_mets_I:
             met_ids_nobiomass = [];
             for umet in uptake_mets_I:
                 if umet in met_ids:
                     met_ids_nobiomass.append(umet);
                 else:
                     print('met_id ' + umet + ' was not found!');
             for umet in met_ids_nobiomass:
                 slope_umet, intercept_umet, r2_umet, rate_umet, units_umet, p_value_umet, std_err_umet = None,None,None,None,None,None,None;
                 slope_umet, intercept_umet, r2_umet, rate_umet, units_umet, p_value_umet, std_err_umet = self.get_rateData_experimentIDAndSampleNameShortAndMetID_dataStage01PhysiologyRates(experiment_id_I,sns,umet);
                 if rate_umet < 0.0: uptake_rates.append(abs(rate_umet));
         else:
             met_ids_nobiomass = [x for x in met_ids if x != 'biomass'];
             for umet in met_ids_nobiomass:
                 slope_umet, intercept_umet, r2_umet, rate_umet, units_umet, p_value_umet, std_err_umet = None,None,None,None,None,None,None;
                 slope_umet, intercept_umet, r2_umet, rate_umet, units_umet, p_value_umet, std_err_umet = self.get_rateData_experimentIDAndSampleNameShortAndMetID_dataStage01PhysiologyRates(experiment_id_I,sns,umet);
                 if rate_umet < 0.0:
                     uptake_rates.append(abs(rate_umet));
                     uptake_units.append(units_umet);
         if not uptake_rates:
             print('no uptake metabolites found!');
             continue;
         # calculate the yield
         yield_ss = None;
         yield_ss_units = None;
         yield_ss,yield_ss_units = calc.calculate_yield_growthRateAndUptakeRates(rate_biomass,uptake_rates);
         yield_ss_units = 'gDCW*mmol-1 of glc-D'; # hard-coded value that needs to be updated
         #add rows to the data base
         row = None;
         row = data_stage01_physiology_rates(experiment_id_I, sns, 'yield_ss',
                 None, None, None, yield_ss, yield_ss_units,
                 None, None,
                 True, None);
         self.session.add(row);
     self.session.commit();
    def export_dataStage01NormalizedAndAverages_checkCVAndExtracelluar_js(self,experiment_id_I,sample_name_abbreviations_I=[],sample_names_I=[],component_names_I=[],
                                                   cv_threshold_I=40,extracellular_threshold_I=80,
                                                   data_dir_I='tmp'):
        '''export data_stage01_quantification_normalized and averages for visualization with ddt'''

        calc = calculate_interface();
        
        print('export_dataStage01Normalized_js...')
        data_norm_broth = [];
        data_norm_filtrate = [];
        data_norm_combined = [];
        data_ave = [];
        # get sample_name_abbreviations
        if sample_name_abbreviations_I:
            sample_name_abbreviations = sample_name_abbreviations_I
        else:
            sample_name_abbreviations = [];
            sample_name_abbreviations = self.get_sampleNameAbbreviations_experimentID_dataStage01Normalized(experiment_id_I);
        # create database table
        for sna in sample_name_abbreviations:
            print('exporting sample_name_abbreviation ' + sna);
            # get component names
            if component_names_I:
                component_names = component_names_I
            else:
                component_names = [];
                component_names = self.get_componentsNames_experimentIDAndSampleNameAbbreviation_dataStage01Normalized(experiment_id_I,sna);
            for cn in component_names:
                print('exporting component_name ' + cn);
                component_group_name = self.get_componentGroupName_experimentIDAndComponentName_dataStage01Normalized(experiment_id_I,cn);
                # get time points
                time_points = self.get_timePoint_experimentIDAndSampleNameAbbreviation_dataStage01Normalized(experiment_id_I,sna);
                for tp in time_points:
                    print('exporting time_point ' + tp);
                    # get the averages and %CV samples
                    row = {};
                    #row = self.get_row_experimentIDAndSampleNameAbbreviationAndTimePointAndComponentName_dataStage01Averages(experiment_id_I,sna,tp,cn);
                    row = self.get_row_experimentIDAndSampleNameAbbreviationAndTimePointAndComponentNameAndCalculatedConcentrationCVAndExtracellularPercent_dataStage01Averages(experiment_id_I,
                                                                        sna,tp,cn,
                                                                        cv_threshold_I=cv_threshold_I,
                                                                        extracellular_threshold_I=extracellular_threshold_I);
                    if not row: continue;
                    stdev = calc.convert_cv2StDev(row['calculated_concentration_filtrate_average'],row['calculated_concentration_filtrate_cv']);
                    row['calculated_concentration_filtrate_lb'] = row['calculated_concentration_filtrate_average']-stdev;
                    row['calculated_concentration_filtrate_ub'] = row['calculated_concentration_filtrate_average']+stdev;
                    stdev = calc.convert_cv2StDev(row['calculated_concentration_broth_average'],row['calculated_concentration_broth_cv']);
                    row['calculated_concentration_broth_lb'] = row['calculated_concentration_broth_average']-stdev;
                    row['calculated_concentration_broth_ub'] = row['calculated_concentration_broth_average']+stdev;
                    stdev = calc.convert_cv2StDev(row['calculated_concentration_average'],row['calculated_concentration_cv']);
                    row['calculated_concentration_lb'] = row['calculated_concentration_average']-stdev;
                    row['calculated_concentration_ub'] = row['calculated_concentration_average']+stdev;
                    data_ave.append(row);
                    # get filtrate sample names
                    sample_names = [];
                    sample_description = 'Filtrate';
                    sample_names = self.get_sampleNames_experimentIDAndSampleNameAbbreviationAndSampleDescriptionAndComponentNameAndTimePoint_dataStage01Normalized(experiment_id_I,sna,sample_description,cn,tp);
                    if sample_names_I: # screen out sample names that are not in the input
                        sample_names = [x for x in sample_names if x in sample_names_I];
                    for sn in sample_names:
                        # get the row
                        row = None;
                        row = self.get_row_sampleNameAndComponentName_dataStage01Normalized(sn,cn);
                        if not(row): continue;
                        row['sample_name_abbreviation'] = sna;
                        data_norm_filtrate.append(row);
                        data_norm_combined.append(row);
                    # get filtrate sample names
                    sample_names = [];
                    sample_description = 'Broth';
                    sample_names = self.get_sampleNames_experimentIDAndSampleNameAbbreviationAndSampleDescriptionAndComponentNameAndTimePoint_dataStage01Normalized(experiment_id_I,sna,sample_description,cn,tp);
                    if sample_names_I: # screen out sample names that are not in the input
                        sample_names = [x for x in sample_names if x in sample_names_I];
                    for sn in sample_names:
                        # get the row
                        row = None;
                        row = self.get_row_sampleNameAndComponentName_dataStage01Normalized(sn,cn);
                        if not(row): continue;
                        row['sample_name_abbreviation'] = sna;
                        data_norm_broth.append(row);
                        data_norm_combined.append(row);
        # dump chart parameters to a js files
        data1_keys = ['experiment_id',
                      'sample_name',
                      'sample_id',
                      'sample_name_abbreviation',
                      'component_group_name',
                      'component_name',
                      'calculated_concentration_units'
                    ];
        data1_nestkeys = ['component_name'];
        data1_keymap = {'xdata':'component_name',
                        'ydatamean':'calculated_concentration',
                        #'ydatalb':'peakInfo_lb',
                        #'ydataub':'peakInfo_ub',
                        #'ydatamin':None,
                        #'ydatamax':None,
                        #'ydataiq1':None,
                        #'ydataiq3':None,
                        #'ydatamedian':None,
                        'serieslabel':'sample_name',
                        'featureslabel':'component_name'};
        data2_keys = ['experiment_id',
                      'sample_name_abbreviation',
                      'time_point',
                      'component_group_name',
                      'component_name',
                      'calculated_concentration_units',
                      'extracellular_percent',
                      'calculated_concentration_broth_cv'
                    ];
        data2_nestkeys = ['component_name'];
        data2_keymap = {'xdata':'component_name',
                        'ydatamean':'calculated_concentration_broth_average',
                        'ydatalb':'calculated_concentration_broth_lb',
                        'ydataub':'calculated_concentration_broth_ub',
                        #'ydatamin':None,
                        #'ydatamax':None,
                        #'ydataiq1':None,
                        #'ydataiq3':None,
                        #'ydatamedian':None,
                        'serieslabel':'sample_name_abbreviation',
                        'featureslabel':'component_name'};
        data3_keys = ['experiment_id',
                      'sample_name_abbreviation',
                      'time_point',
                      'component_group_name',
                      'component_name',
                      'calculated_concentration_units',
                      'extracellular_percent',
                      'calculated_concentration_filtrate_cv'
                    ];
        data3_nestkeys = ['component_name'];
        data3_keymap = {'xdata':'component_name',
                        'ydatamean':'calculated_concentration_filtrate_average',
                        'ydatalb':'calculated_concentration_filtrate_lb',
                        'ydataub':'calculated_concentration_filtrate_ub',
                        #'ydatamin':None,
                        #'ydatamax':None,
                        #'ydataiq1':None,
                        #'ydataiq3':None,
                        #'ydatamedian':None,
                        'serieslabel':'sample_name_abbreviation',
                        'featureslabel':'component_name'};
        data4_keys = ['experiment_id',
                      'sample_name_abbreviation',
                      'time_point',
                      'component_group_name',
                      'component_name',
                      'calculated_concentration_units',
                      'extracellular_percent',
                      'calculated_concentration_cv'
                    ];
        data4_nestkeys = ['component_name'];
        data4_keymap = {'xdata':'component_name',
                        'ydatamean':'calculated_concentration_average',
                        'ydatalb':'calculated_concentration_lb',
                        'ydataub':'calculated_concentration_ub',
                        #'ydatamin':None,
                        #'ydatamax':None,
                        #'ydataiq1':None,
                        #'ydataiq3':None,
                        #'ydatamedian':None,
                        'serieslabel':'sample_name_abbreviation',
                        'featureslabel':'component_name'};
        # make the data object
        dataobject_O = [{"data":data_norm_broth,"datakeys":data1_keys,"datanestkeys":data1_nestkeys},
                        {"data":data_norm_filtrate,"datakeys":data1_keys,"datanestkeys":data1_nestkeys},
                        {"data":data_norm_combined,"datakeys":data1_keys,"datanestkeys":data1_nestkeys},
                        {"data":data_ave,"datakeys":data2_keys,"datanestkeys":data2_nestkeys},
                        {"data":data_ave,"datakeys":data3_keys,"datanestkeys":data3_nestkeys},
                        {"data":data_ave,"datakeys":data4_keys,"datanestkeys":data4_nestkeys}];
        # make the tile parameter objects for the normalized and averages
        formtileparameters_normalized_O = {'tileheader':'Filter menu normalized','tiletype':'html','tileid':"filtermenu1",'rowid':"row1",'colid':"col1",
            'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-6"};
        formparameters_normalized_O = {'htmlid':'filtermenuform1',"htmltype":'form_01',"formsubmitbuttonidtext":{'id':'submit1','text':'submit'},"formresetbuttonidtext":{'id':'reset1','text':'reset'},"formupdatebuttonidtext":{'id':'update1','text':'update'}};
        formtileparameters_normalized_O.update(formparameters_normalized_O);
        formtileparameters_averages_O = {'tileheader':'Filter menu averages','tiletype':'html','tileid':"filtermenu2",'rowid':"row1",'colid':"col2",
            'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-6"};
        formparameters_averages_O = {'htmlid':'filtermenuform2',"htmltype":'form_01',"formsubmitbuttonidtext":{'id':'submit2','text':'submit'},"formresetbuttonidtext":{'id':'reset2','text':'reset'},"formupdatebuttonidtext":{'id':'update2','text':'update'}};
        formtileparameters_averages_O.update(formparameters_averages_O);
        # make the svg objects for the normalized data
        svgparameters_broth_O = {"svgtype":'boxandwhiskersplot2d_02',"svgkeymap":[data1_keymap],
                            'svgid':'svg1',
                            "svgmargin":{ 'top': 50, 'right': 150, 'bottom': 50, 'left': 50 },
                            "svgwidth":250,"svgheight":250,
                            "svgx1axislabel":"component_name","svgy1axislabel":"concentration",
    						'svgformtileid':'filtermenu1','svgresetbuttonid':'reset1','svgsubmitbuttonid':'submit1'};
        svgtileparameters_broth_O = {'tileheader':'Broth data','tiletype':'svg','tileid':"tile1",'rowid':"row2",'colid':"col1",
            'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-4"};
        svgtileparameters_broth_O.update(svgparameters_broth_O);
        svgparameters_filtrate_O = {"svgtype":'boxandwhiskersplot2d_02',"svgkeymap":[data1_keymap],
                            'svgid':'svg2',
                            "svgmargin":{ 'top': 50, 'right': 150, 'bottom': 50, 'left': 50 },
                            "svgwidth":250,"svgheight":250,
                            "svgx1axislabel":"component_name","svgy1axislabel":"concentration",
    						'svgformtileid':'filtermenu1','svgresetbuttonid':'reset1','svgsubmitbuttonid':'submit1'};
        svgtileparameters_filtrate_O = {'tileheader':'Filtrate data','tiletype':'svg','tileid':"tile2",'rowid':"row2",'colid':"col2",
            'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-4"};
        svgtileparameters_filtrate_O.update(svgparameters_filtrate_O);
        svgparameters_combined_O = {"svgtype":'boxandwhiskersplot2d_02',"svgkeymap":[data1_keymap],
                            'svgid':'svg3',
                            "svgmargin":{ 'top': 50, 'right': 150, 'bottom': 50, 'left': 50 },
                            "svgwidth":250,"svgheight":250,
                            "svgx1axislabel":"component_name","svgy1axislabel":"concentration",
    						'svgformtileid':'filtermenu1','svgresetbuttonid':'reset1','svgsubmitbuttonid':'submit1'};
        svgtileparameters_combined_O = {'tileheader':'Broth-Filtrate data','tiletype':'svg','tileid':"tile3",'rowid':"row2",'colid':"col3",
            'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-4"};
        svgtileparameters_combined_O.update(svgparameters_combined_O);
        # make the svg objects for the averages data
        svgparameters_averages_broth_O = {"svgtype":'boxandwhiskersplot2d_02',"svgkeymap":[data2_keymap],
                            'svgid':'svg4',
                            "svgmargin":{ 'top': 50, 'right': 150, 'bottom': 50, 'left': 50 },
                            "svgwidth":250,"svgheight":250,
                            "svgx1axislabel":"component_name","svgy1axislabel":"concentration",
    						'svgformtileid':'filtermenu2','svgresetbuttonid':'reset2','svgsubmitbuttonid':'submit2'};
        svgtileparameters_averages_broth_O = {'tileheader':'Broth data','tiletype':'svg','tileid':"tile4",'rowid':"row3",'colid':"col1",
            'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-4"};
        svgtileparameters_averages_broth_O.update(svgparameters_averages_broth_O);
        svgparameters_averages_filtrate_O = {"svgtype":'boxandwhiskersplot2d_02',"svgkeymap":[data3_keymap],
                            'svgid':'svg5',
                            "svgmargin":{ 'top': 50, 'right': 150, 'bottom': 50, 'left': 50 },
                            "svgwidth":250,"svgheight":250,
                            "svgx1axislabel":"component_name","svgy1axislabel":"concentration",
    						'svgformtileid':'filtermenu2','svgresetbuttonid':'reset2','svgsubmitbuttonid':'submit2'};
        svgtileparameters_averages_filtrate_O = {'tileheader':'Filtrate data','tiletype':'svg','tileid':"tile5",'rowid':"row3",'colid':"col2",
            'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-4"};
        svgtileparameters_averages_filtrate_O.update(svgparameters_averages_filtrate_O);
        svgparameters_averages_combined_O = {"svgtype":'boxandwhiskersplot2d_02',"svgkeymap":[data4_keymap],
                            'svgid':'svg6',
                            "svgmargin":{ 'top': 50, 'right': 150, 'bottom': 50, 'left': 50 },
                            "svgwidth":250,"svgheight":250,
                            "svgx1axislabel":"component_name","svgy1axislabel":"concentration",
    						'svgformtileid':'filtermenu2','svgresetbuttonid':'reset2','svgsubmitbuttonid':'submit2'};
        svgtileparameters_averages_combined_O = {'tileheader':'Broth-Filtrate data','tiletype':'svg','tileid':"tile6",'rowid':"row3",'colid':"col3",
            'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-4"};
        svgtileparameters_averages_combined_O.update(svgparameters_averages_combined_O);
        # make the tables for the normalized and averages data
        tableparameters_normalized_O = {"tabletype":'responsivetable_01',
                    'tableid':'table1',
                    "tablefilters":None,
                    "tableclass":"table  table-condensed table-hover",
    			    'tableformtileid':'filtermenu1','tableresetbuttonid':'reset1','tablesubmitbuttonid':'submit1'};
        tabletileparameters_normalized_O = {'tileheader':'normalized data','tiletype':'table','tileid':"tile7",'rowid':"row4",'colid':"col1",
            'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-12"};
        tabletileparameters_normalized_O.update(tableparameters_normalized_O);
        tableparameters_averages_O = {"tabletype":'responsivetable_01',
                    'tableid':'table2',
                    "tablefilters":None,
                    "tableclass":"table  table-condensed table-hover",
    			    'tableformtileid':'filtermenu2','tableresetbuttonid':'reset2','tablesubmitbuttonid':'submit2'};
        tabletileparameters_averages_O = {'tileheader':'averages data','tiletype':'table','tileid':"tile8",'rowid':"row5",'colid':"col1",
            'tileclass':"panel panel-default",'rowclass':"row",'colclass':"col-sm-12"};
        tabletileparameters_averages_O.update(tableparameters_averages_O);
        parametersobject_O = [formtileparameters_normalized_O,
                              formtileparameters_averages_O,
                              svgtileparameters_broth_O,
                              svgtileparameters_filtrate_O,
                              svgtileparameters_combined_O,
                              svgtileparameters_averages_broth_O,
                              svgtileparameters_averages_filtrate_O,
                              svgtileparameters_averages_combined_O,
                              tabletileparameters_normalized_O,
                              tabletileparameters_averages_O];
        tile2datamap_O = {"filtermenu1":[2],"filtermenu2":[5],
                          "tile1":[0],"tile2":[1],"tile3":[2],
                          "tile4":[3],"tile5":[4],"tile6":[5],
                          "tile7":[2],"tile8":[5]};
        filtermenuobject_O = [{"filtermenuid":"filtermenu1","filtermenuhtmlid":"filtermenuform1",
                "filtermenusubmitbuttonid":"submit1","filtermenuresetbuttonid":"reset1",
                "filtermenuupdatebuttonid":"update1"},{"filtermenuid":"filtermenu2","filtermenuhtmlid":"filtermenuform2",
                "filtermenusubmitbuttonid":"submit2","filtermenuresetbuttonid":"reset2",
                "filtermenuupdatebuttonid":"update2"}
                              ];
        # dump the data to a json file
        data_str = 'var ' + 'data' + ' = ' + json.dumps(dataobject_O) + ';';
        parameters_str = 'var ' + 'parameters' + ' = ' + json.dumps(parametersobject_O) + ';';
        tile2datamap_str = 'var ' + 'tile2datamap' + ' = ' + json.dumps(tile2datamap_O) + ';';
        filtermenu_str = 'var ' + 'filtermenu' + ' = ' + json.dumps(filtermenuobject_O) + ';';
        #
        ddtutilities = ddt_container(parameters_I = parametersobject_O,data_I = dataobject_O,tile2datamap_I = tile2datamap_O,filtermenu_I = filtermenuobject_O);
        if data_dir_I=='tmp':
            filename_str = self.settings['visualization_data'] + '/tmp/ddt_data.js'
        elif data_dir_I=='data_json':
            data_json_O = ddtutilities.get_allObjects_js();
            return data_json_O;
        with open(filename_str,'w') as file:
            file.write(ddtutilities.get_allObjects());
    def execute_calculateGeoAverages_replicates_v1(
        self,
        experiment_id_I,
        sample_name_abbreviations_I=[],
        ):
        '''Calculate the averages from replicates MI in ln space'''

        calc = calculate_interface();

        print(' execute_calculateGeoAverages_replicates...')
        data_O = [];
        # get sample_name_abbreviations
        if sample_name_abbreviations_I:
            sample_name_abbreviations = sample_name_abbreviations_I;
        else:
            sample_name_abbreviations = [];
            sample_name_abbreviations = self.get_sampleNameAbbreviations_experimentID_dataStage01ReplicatesMI(experiment_id_I);
        for sna in sample_name_abbreviations:
            print('calculating the geometric average from replicates for sample_name_abbreviation ' + sna);
            # get component names
            component_names = [];
            component_names = self.get_componentNames_experimentIDAndSampleNameAbbreviation_dataStage01ReplicatesMI(experiment_id_I,sna);
            # get time points
            time_points = self.get_timePoint_experimentIDAndSampleNameAbbreviation_dataStage01ReplicatesMI(experiment_id_I,sna);
            for cn in component_names:
                print('calculating the geometric average from replicates for component_names ' + cn);
                component_group_name = self.get_componentGroupName_experimentIDAndComponentName_dataStage01Normalized(experiment_id_I,cn);
                for tp in time_points:
                    print('calculating the geometric average from replicates for time_points ' + tp);
                    # get sample names short
                    sample_names_short = [];
                    sample_names_short = self.get_sampleNameShort_experimentIDAndSampleNameAbbreviationAndComponentNameAndTimePoint_dataStage01ReplicatesMI(experiment_id_I,sna,cn,tp);
                    concs = [];
                    conc_units = None;
                    for sns in sample_names_short:
                        # concentrations and units
                        conc = None;
                        conc_unit = None;
                        conc, conc_unit = self.get_concAndConcUnits_experimentIDAndSampleNameShortAndTimePointAndComponentName_dataStage01ReplicatesMI(experiment_id_I,sns,tp,cn);
                        if (not(conc) or conc==0): continue;
                        # calculate the ln of the concentration
                        # and convert to M from mM or uM
                        if (conc_unit == 'mM'): 
                            conc_units = 'M'; 
                            conc = conc*1e-3;
                        elif (conc_unit == 'uM'):
                            conc_units = 'M'; 
                            conc = conc*1e-6;
                        elif (conc_unit == 'uM'):
                            conc_units = 'M'; 
                            conc = conc*1e-6;
                        elif (conc_unit == 'umol*gDW-1'):
                            conc_units = 'mol*gDW-1';
                            conc = conc*1e-6;
                        elif (conc_unit == 'height_ratio' or conc_unit == 'area_ratio'):
                            continue;
                        else:
                            print('units of ' + str(conc_unit) + ' are not supported')
                            exit(-1);
                        concs.append(conc);
                    n_replicates = len(concs);
                    conc_average = 0.0;
                    conc_var = 0.0;
                    conc_lb = 0.0;
                    conc_ub = 0.0;
                    # calculate average and CV of concentrations
                    if (not(concs)): 
                        continue
                    elif n_replicates<2: 
                        continue
                    else: 
                        conc_average, conc_var, conc_lb, conc_ub = calc.calculate_ave_var_geometric(concs);

                    # add data to the session
                    row = {"experiment_id":experiment_id_I, 
                        "sample_name_abbreviation":sna, 
                        "time_point":tp, 
                        "component_group_name":component_group_name, 
                        "component_name":cn,
                        "n_replicates":n_replicates, 
                        "calculated_concentration_average":conc_average, 
                        "calculated_concentration_var":conc_var, 
                        "calculated_concentration_lb":conc_lb, 
                        "calculated_concentration_ub":conc_ub, 
                        "calculated_concentration_units":conc_units, 
                        "used_":True
                        };   
                    data_O.append(row);
        self.add_rows_table('data_stage01_quantification_averagesMIgeo',data_O)
    def execute_analyzeQCs(self,experiment_id_I,sample_types_I=['QC']):
        '''calculate the average and coefficient of variation for QCs

        NOTE: analytical replicates are those samples with the same 
        sample_id (but different sample_name)
        INPUT:

        experiment_id
        OUTPUT:

        sample_name
        component_group_name
        component_name
        n_replicates
        conc_average
        conc_CV
        conc_units

        '''
        calc = calculate_interface();
        
        print('execute_analyzeQCs...')
        # get sample name abbreviations
        sample_name_abbreviations = [];
        data_O = [];
        for st in sample_types_I:
            sample_name_abbreviations_tmp = [];
            sample_name_abbreviations_tmp = self.get_sampleNameAbbreviations_experimentIDAndSampleType(experiment_id_I,st);
            sample_name_abbreviations.extend(sample_name_abbreviations_tmp);
        # create database table
        for sna in sample_name_abbreviations:
            # get dilutions
            sample_dilutions = [];
            sample_dilutions = self.get_sampleDilution_experimentIDAndSampleNameAbbreviation(experiment_id_I,sna);
            # get component names
            component_names = [];
            component_names = self.get_componentsNames_experimentIDAndSampleNameAbbreviation(experiment_id_I,sna);
            for cn in component_names:
                component_group_name = self.get_componentGroupName_experimentIDAndComponentName(experiment_id_I,cn);
                for sd in sample_dilutions:
                    # get sample names
                    sample_names = [];
                    sample_names = self.get_sampleNames_experimentIDAndSampleNameAbbreviationAndSampleDilution(experiment_id_I,sna,sd);
                    if len(sample_names)<2: continue;
                    concs = [];
                    conc_units = None;
                    for sn in sample_names:
                        # concentrations and units
                        conc = None;
                        conc_unit = None;
                        conc, conc_unit = self.get_concAndConcUnits_sampleNameAndComponentName(sn,cn);
                        if not(conc): continue
                        if (conc_unit): conc_units = conc_unit;
                        concs.append(conc);
                    n_replicates = len(concs);
                    # calculate average and CV of concentrations
                    if (not(concs) or n_replicates<2): continue
                    conc_average, data_var_O, conc_CV, data_lb_O, data_ub_O = calc.calculate_ave_var_cv(concs);
                    data_O.append({'experiment_id':experiment_id_I,
                        'sample_name_abbreviation':sna,
                        'sample_dilution':sd,
                        'component_group_name':component_group_name,
                        'component_name':cn,
                        'n_replicates':n_replicates,
                        'calculated_concentration_average':conc_average,
                        'calculated_concentration_CV':conc_CV,
                        'calculated_concentration_units':conc_units});
        self.add_dataStage01_quantification_QCs(data_O);
 def execute_physiologicalRatios_averages(self,experiment_id_I):
     '''Calculate physiologicalRatios_averages from physiologicalRatios_replicates'''
     calc = calculate_interface();
     
     print('calculate_physiologicalRatios_averages...')
     data_O = [];
     # get sample_name_abbreviations
     sample_name_abbreviations = [];
     sample_name_abbreviations = self.get_sampleNameAbbreviations_experimentID_dataStage01PhysiologicalRatiosReplicates(experiment_id_I);
     for sna in sample_name_abbreviations:
         print('calculating physiologicalRatios from replicates for sample_name_abbreviation ' + sna);
         # get time points
         time_points = [];
         time_points = self.get_timePoint_experimentIDAndSampleNameAbbreviation_dataStage01PhysiologicalRatiosReplicates(experiment_id_I,sna);
         for tp in time_points:
             print('calculating physiologicalRatios from replicates for time_point ' + tp);
             # get ratio information
             ratio_info = {};
             ratio_info = self.get_ratioIDs_experimentIDAndTimePoint_dataStage01PhysiologicalRatiosReplicates(experiment_id_I,tp)
             #for k,v in self.ratios.iteritems():
             for k,v in ratio_info.items():
                 print('calculating physiologicalRatios from replicates for ratio ' + k);
                 # get sample names short
                 sample_names_short = [];
                 sample_names_short = self.get_sampleNameShort_experimentIDAndSampleNameAbbreviationAndRatioIDAndTimePoint_dataStage01PhysiologicalRatiosReplicates(experiment_id_I,sna,k,tp);
                 ratios = [];
                 for sns in sample_names_short:
                     # get ratios
                     ratio = None;
                     ratio = self.get_ratio_experimentIDAndSampleNameShortAndTimePointAndRatioID_dataStage01PhysiologicalRatiosReplicates(experiment_id_I,sns,tp,k);
                     if not ratio: continue;
                     ratios.append(ratio);
                 n_replicates = len(ratios);
                 ratio_average = 0.0;
                 ratio_var = 0.0;
                 ratio_cv = 0.0;
                 ratio_lb = 0.0;
                 ratio_ub = 0.0;
                 # calculate average and CV of ratios
                 if (not(ratios)): 
                     continue
                 elif n_replicates<2: 
                     continue
                 else: 
                     ratio_average,ratio_var,ratio_lb,ratio_ub = calc.calculate_ave_var(ratios);
                     if (ratio_average <= 0): ratio_cv = 0;
                     else: ratio_cv = sqrt(ratio_var)/ratio_average*100; 
                 # add data to the session
                 row = {
                     "experiment_id":experiment_id_I, 
                     "sample_name_abbreviation":sna,
                     "time_point":tp,
                     "physiologicalratio_id":k,
                     "physiologicalratio_name":v['name'],
                     "physiologicalratio_value_ave":ratio_average,
                     "physiologicalratio_value_cv":ratio_cv,
                     "physiologicalratio_value_lb":ratio_lb,
                     "physiologicalratio_value_ub":ratio_ub,
                     "physiologicalratio_description":v['description'],
                     "used_":True,
                     "comment_":None
                     };   
                 data_O.append(row);                        
     self.add_rows_table('data_stage01_quantification_physiologicalRatios_averages',data_O);