def execute_calculateMissingValues_replicates(self,experiment_id_I,sample_name_abbreviations_I=[],r_calc_I=None):
        '''calculate estimates for missing replicates values using AmeliaII from R
        INPUT:
        experiment_id_I
        sample_name_abbreviations_I'''
        
        if r_calc_I: r_calc = r_calc_I;
        else: r_calc = r_interface();

        print('execute_calculateMissingValues_replicates...')
        # get sample name abbreviations
        if sample_name_abbreviations_I:
            sample_names_abbreviation = sample_name_abbreviations_I;
        else:
            sample_names_abbreviation = [];
            sample_names_abbreviation = self.get_sampleNameAbbreviations_experimentID_dataStage01Replicates(experiment_id_I);
        # for each sample name abbreviation
        for sna in sample_names_abbreviation:
            print('calculating missing values for sample_name_abbreviation ' + sna);
            # get time points
            time_points = [];
            time_points = self.get_timePoint_experimentIDAndSampleNameAbbreviation_dataStage01Replicates(experiment_id_I,sna);
            for tp in time_points:
                print('calculating missing values for time_point ' + tp);
                # get sample names short
                sample_names_short = []
                sample_names_short = self.get_SampleNameShort_experimentIDAndSampleNameAbbreviationAndTimePoint_dataStage01Replicates(experiment_id_I,sna,tp);
                data = [];
                for sns in sample_names_short:
                    print('calculating missing values for sample_name_abbreviation ' + sns);
                    # get sample names short, component names, and concentrations
                    data_tmp = [];
                    data_tmp = self.get_data_experimentIDAndSampleNameShortAndTimePoint_dataStage01Replicates(experiment_id_I,sns,tp);
                    data.extend(data_tmp);
                # compute missing values
                dataListUpdated = [];
                sns_NA = [];
                cn_NA = [];
                cc_NA = [];
                sns_NA, cn_NA, cc_NA = r_calc.calculate_missingValues(data);
                for n in range(len(sns_NA)):
                    component_group_name = None;
                    calculated_concentration_units = None;
                    component_group_name, calculated_concentration_units = self.get_componentGroupNameAndConcUnits_experimentIDAndComponentNameAndSampleNameAbbreviationAndTimePoint_dataStage01Replicates(experiment_id_I,cn_NA[n],sna,tp);
                    # update data_stage01_quantification_replicatesMI
                    row = data_stage01_quantification_replicatesMI(experiment_id_I,sns_NA[n],tp,component_group_name,cn_NA[n],"AmeliaII",None,cc_NA[n],calculated_concentration_units,True,None);
                    self.session.add(row);
            self.session.commit(); 
    def execute_calculateMissingValues_ameliaII(self,
            analysis_id_I,
            imputation_method_I = 'ameliaII',
            imputation_options_I = {'n_imputations':1000},
            calculated_concentration_units_I=[],
            experiment_ids_I=[],
            sample_name_abbreviations_I=[],
            time_points_I=[],
            r_calc_I=None):
        '''calculate estimates for missing replicates values using AmeliaII from R
        INPUT:
        experiment_id_I
        sample_name_abbreviations_I'''
        
        if r_calc_I: r_calc = r_calc_I;
        else: r_calc = r_interface();

        print('execute_calculateMissingValues_ameliaII...')
        data_O = [];
        data_imputations = [];
        # get the calculated_concentration_units/experiment_ids/sample_name_abbreviations/time_points that are unique
        unique_groups = [];
        unique_groups = get_calculatedConcentrationUnitsAndExperimentIDsAndSampleNameAbbreviationsAndTimePoints_analysisID_dataPreProcessingReplicates(
            analysis_id_I,
            calculated_concentration_units_I=calculated_concentration_units_I,
            experiment_ids_I=experiment_ids_I,
            sample_name_abbreviations_I=sample_name_abbreviations_I,
            time_points_I=time_points_I,
            );
        #unique_groups_1 = [];
        ##is there a more "pythonic" way of doing this?
        ##or alternatively, should this just be added into the query?
        #for row in unique_groups: 
        #    add_row = True
        #    if calculated_concentration_units_I and not row['calculated_concentration_units'] in calculated_concentration_units_I:
        #        add_row = False;
        #    elif experiment_ids_I and not row['experiment_id'] in experiment_ids_I:
        #        add_row = False;
        #    elif sample_name_abbreviations_I and not row['sample_name_abbreviation'] in sample_name_abbreviations_I:
        #        add_row = False;
        #    elif time_points_I and not row['time_point'] in time_points_I:
        #        add_row = False;
        #    if add_row:
        #        unique_groups_1.append(row);
        for row in unique_groups:
            data = [];
            data = get_rows_analysisIDAndCalculatedConcentrationUnitsAndExperimentIDsAndSampleNameAbbreviationsAndTimePoints_dataPreProcessingReplicates(
                analysis_id_I,
                row['calculated_concentration_units'],
                row['experiment_id'],
                row['sample_name_abbreviation'],
                row['time_point'],
                );
            # compute missing values
            dataListUpdated = [];
            sns_NA = [];
            cn_NA = [];
            cc_NA = [];
            sns_NA, cn_NA, cc_NA = r_calc.calculate_missingValues(
                data,
                imputation_options_I['n_imputations']
                );
            for n in range(len(sns_NA)):
                component_group_name = None;
                # update data_stage01_quantification_replicatesMI
                row = data_stage01_quantification_replicatesMI(experiment_id_I,sns_NA[n],tp,component_group_name,cn_NA[n],"AmeliaII",None,cc_NA[n],calculated_concentration_units,True,None);
            # record data imputation method
            tmp = {
                "analysis_id":analysis_id_I,
                "imputation_method":imputation_method_I,
                "imputation_options":imputation_options_I,
                "normalization_method":None,
                "normalization_options":None,
                'calculated_concentration_units':cu,
                "used_":True,
                'comment_I':None
                }
            data_imputations.append(tmp);
        self.add_rows_table('data_preProcessing_replicates',data_O);
        self.add_rows_table('data_preProcessing_replicates_imputationAndNormalizations',data_imputations);
Exemplo n.º 3
0
##import the analysis
#analysis01.import_rows_table_add_csv(
#    'data_preProcessing_analysis',
#    pg_settings.datadir_settings['workspace_data']+'/_input/160309_RNASequencing_dataPreProcessing_ALEsKOs01_analysis01.csv'
#    );

#make the dataPreProcessing tables
from SBaaS_dataPreProcessing.dataPreProcessing_replicates_execute import dataPreProcessing_replicates_execute
dpprep01 = dataPreProcessing_replicates_execute(session,engine,pg_settings.datadir_settings);
dpprep01.initialize_supportedTables();
#dpprep01.drop_tables();
dpprep01.initialize_tables();

# Load R once
from r_statistics.r_interface import r_interface
r_calc = r_interface();

#get RNAsequencing data
snsPreProcessing2snRNASequencing_I={
    'OxicEvo04EcoliGlcM9_Broth-4':'140818_0_OxicEvo04EcoliGlcM9_Broth-4',
    'OxicEvo04EcoliGlcM9_Broth-5':'140818_0_OxicEvo04EcoliGlcM9_Broth-5',
    'OxicEvo04Evo01EPEcoliGlcM9_Broth-1':'140815_11_OxicEvo04Evo01EPEcoliGlcM9_Broth-1',
    'OxicEvo04Evo01EPEcoliGlcM9_Broth-2':'140815_11_OxicEvo04Evo01EPEcoliGlcM9_Broth-2',
    'OxicEvo04Evo02EPEcoliGlcM9_Broth-1':'140815_11_OxicEvo04Evo02EPEcoliGlcM9_Broth-1',
    'OxicEvo04Evo02EPEcoliGlcM9_Broth-2':'140815_11_OxicEvo04Evo02EPEcoliGlcM9_Broth-2',
    'OxicEvo04gndEcoliGlcM9_Broth-1':'140715_0_OxicEvo04gndEcoliGlcM9_Broth-1',
    'OxicEvo04gndEcoliGlcM9_Broth-2':'140715_0_OxicEvo04gndEcoliGlcM9_Broth-2',
    'OxicEvo04gndEvo01EPEcoliGlcM9_Broth-1':'140812_11_OxicEvo04gndEvo01EPEcoliGlcM9_Broth-1',
    'OxicEvo04gndEvo01EPEcoliGlcM9_Broth-2':'140812_11_OxicEvo04gndEvo01EPEcoliGlcM9_Broth-2',
    'OxicEvo04gndEvo02EPEcoliGlcM9_Broth-1':'140812_11_OxicEvo04gndEvo02EPEcoliGlcM9_Broth-1',
    'OxicEvo04gndEvo02EPEcoliGlcM9_Broth-2':'140812_11_OxicEvo04gndEvo02EPEcoliGlcM9_Broth-2',
    def execute_normalization_dataSet(self,
            analysis_id_I,
            imputation_methods_I=[],
            normalization_methods_I=[],
            calculated_concentration_units_I=[],
            normalization_method_I='gLog',
            normalization_options_I={'mult':"TRUE",'lowessnorm':"FALSE"},
            r_calc_I=None
            ):
        '''normalization of the full data set'''

        print('execute_glogNormalization...')
        if r_calc_I: r_calc = r_calc_I;
        else: r_calc = r_interface();
        python_calc = calculate_statisticsDescriptive();
        data_O = [];
        data_normalized = [];
        data_normalizations = [];
        # get the calculated_concentration_units
        if calculated_concentration_units_I:
            calculated_concentration_units = calculated_concentration_units_I;
        else:
            calculated_concentration_units = [];
            calculated_concentration_units = self.get_calculatedConcentrationUnits_analysisID_dataPreProcessingReplicates(analysis_id_I);
        for cu in calculated_concentration_units:
            print('calculating normalization for concentration_units ' + cu);
            # get the data set
            data = [];
            data = self.get_rows_analysisIDAndCalculatedConcentrationUnits_dataPreProcessingReplicates(
                analysis_id_I,
                cu,
                query_I={},
                output_O='listDict',
                dictColumn_I=None);
            # normalize the data set
            if normalization_method_I == 'gLog':
                concentrations = None;
                concentrations_glog = None;
                data_glog, concentrations, concentrations_glog = r_calc.calculate_glogNormalization(
                    data,
                    normalization_options_I['mult'],
                    normalization_options_I['lowessnorm']);
                data_normalized.extend(data_glog);
            elif normalization_method_I in ["log2","log10","ln","abs","exp","exp2","^10","^2","sqrt"]:
                for d in data:
                    normalized_value = python_calc.scale_values(d['calculated_concentration'],normalization_method_I);
                    normalized_units = ('%s_%s_%s' %(d['calculated_concentration_units'],normalization_method_I,'normalized'));
                    d['calculated_concentration'] = normalized_value;
                    d['calculated_concentration_units'] = normalized_units;
                    d['imputation_method'] = None;
                data_normalized.extend(data);
            else:
                print('normalization_method_I not recognized');
                continue;
            # record data normalization method
            tmp = {
                "analysis_id":analysis_id_I,
                "imputation_method":None,
                "imputation_options":None,
                "normalization_method":normalization_method_I,
                "normalization_options":normalization_options_I,
                'calculated_concentration_units':cu,
                "used_":True,
                'comment_I':None
                }
            data_normalizations.append(tmp);
        self.add_rows_table('data_preProcessing_replicates',data_normalized);
        self.add_rows_table('data_preProcessing_replicates_imputationAndNormalizations',data_normalizations);