def execute_findAmplificationsAndCalculateStats_fromGff(self,
                #analysis_id_I,
                experiment_id_I,
                strand_start, strand_stop,
                sample_names_I = [],
                scale_factor=True, downsample_factor=2000,reads_min=1.5,reads_max=5.0, indices_min=200,consecutive_tol=10):
        '''Calculate coverage statistics from gff file
        NOTE: multiple chromosomes not yet supported in sequencing_utilities'''

        # get the data
        data_O = [];
        stats_O = [];
        #OPTION1
        gffcoverage = gff_coverage();

        ## get the analysis_info
        #analysis_rows = [];
        # query information from coverage table

        # get the sample_names
        experiment_id = experiment_id_I;
        if sample_names_I:
            sample_names = sample_names_I;
        else:
            sample_names = [];
            sample_names = self.get_sampleNames_experimentID_dataStage01ResequencingCoverage(experiment_id_I);
        #for cnt,analysis in analysis_rows:
        #    # get the sample_names and experiment_ids
        #    experiment_id = analysis['experiment_id'];
        #    sn = analysis['sample_name'];
        #    filename = analysis['data_dir']
        for cnt,sn in enumerate(sample_names):
            # get the data_dir
            filename = [];
            filename = self.get_dataDirs_experimentIDAndSampleName_dataStage01ResequencingCoverage(experiment_id_I,sn);
            #OPTION1
            # find amplifications and calculate stats
            gffcoverage.findAndCalculate_amplificationStats_fromGff(filename[0],strand_start, strand_stop, experiment_id_I=experiment_id, sample_name_I=sn, indices_min = indices_min, consecutive_tol = consecutive_tol, scale_factor=scale_factor, downsample_factor=downsample_factor)
            data_O.extend(copy(gffcoverage.amplifications));
            stats_O.extend(copy(gffcoverage.amplificationStats));
            gffcoverage.clear_data();
            ##OPTION2
            ## find amplifications and calculate stats
            #amplifications,amplificationStats=[],[];
            #amplifications,amplificationStats = self.findAndCalculate_amplificationStats_fromGff(filename[0],strand_start, strand_stop, experiment_id_I=experiment_id, sample_name_I=sn, indices_min = indices_min, consecutive_tol = consecutive_tol, scale_factor=scale_factor, downsample_factor=downsample_factor)
            #data_O.extend(amplifications);
            #stats_O.extend(amplificationStats);
        # add data to the DB
        self.add_dataStage01ResequencingAmplifications(data_O);
        self.add_dataStage01ResequencingAmplificationStats(stats_O);
    def import_resequencingCoverageData_add(
        self,
        filename,
        # analysis_id,
        experiment_id,
        sample_name,
        strand_start,
        strand_stop,
        scale_factor=True,
        downsample_factor=2000,
    ):
        """table adds
        NOTE: multiple chromosomes not yet supported in sequencing_utilities"""
        # OPTION1
        gffcoverage = gff_coverage()

        coverage_data = []
        if ".bam" in filename:
            # TODO convert .bam to .gff using makegff.py from sequencing_utilities
            print("conversion of .bam to .gff not yet supported")
            exit(2)
            # filename_bam = filename;
            # filename = filename.replace('.bam','.gff');
            # extract_strandsFromGff(filename_bam,filename,separate_strand=False);
        # convert strings to float and int
        strand_start, strand_stop, scale_factor, downsample_factor = (
            int(strand_start),
            int(strand_stop),
            bool(scale_factor),
            float(downsample_factor),
        )
        # OPTION1
        # parse the gff file
        gffcoverage.extract_coverage_fromGff(
            filename,
            strand_start,
            strand_stop,
            scale_factor=scale_factor,
            downsample_factor=downsample_factor,
            experiment_id_I=experiment_id,
            sample_name_I=sample_name,
        )
        coverage_data = gffcoverage.coverage
        ##OPTION2
        ## parse the gff file
        # coverage_data = [];
        # coverage_data = self.extract_coverage_fromGff(filename, strand_start, strand_stop, scale_factor=scale_factor, downsample_factor=downsample_factor,experiment_id_I = experiment_id,sample_name_I=sample_name);
        # add data to the database:
        self.add_dataStage01ResequencingCoverage(coverage_data)
    def execute_findAmplifications_fromGff(self,
                #analysis_id_I,
                experiment_id_I,
                strand_start, strand_stop,
                sample_names_I = [],
                scale_factor=True, downsample_factor=0,reads_min=1.5,reads_max=5.0, indices_min=200,consecutive_tol=10):
        '''Calculate coverage statistics from gff file
        NOTE: multiple chromosomes not yet supported in sequencing_utilities'''

        #from sequencing_utilities.coverage import extract_strandsFromGff,find_highCoverageRegions

        # get the data
        data_O = [];
        #OPTION1
        gffcoverage = gff_coverage();

        # get the sample_names
        experiment_id = experiment_id_I;
        if sample_names_I:
            sample_names = sample_names_I;
        else:
            sample_names = [];
            sample_names = self.get_sampleNames_experimentID_dataStage01ResequencingCoverage(experiment_id_I);
        #for cnt,analysis in analysis_rows:
        #    # get the sample_names and experiment_ids
        #    experiment_id = analysis['experiment_id'];
        #    sn = analysis['sample_name'];
        #    filename = analysis['data_dir']
        for cnt,sn in enumerate(sample_names):
            # get the data_dir
            filename = [];
            filename = self.get_dataDirs_experimentIDAndSampleName_dataStage01ResequencingCoverage(experiment_id_I,sn);
            #OPTION1
            gffcoverage.find_amplifications_fromGff(filename[0],strand_start, strand_stop, experiment_id, sn, scale=scale_factor, downsample=downsample_factor)
            data_O.extend(copy(gffcoverage.amplifications))
            gffcoverage.clear_data();
            #OPTION2
            #amplifications = [];
            #amplifications = self.find_amplifications_fromGff(filename[0],strand_start, strand_stop, experiment_id, sn, scale=scale_factor, downsample=downsample_factor)
            #data_O.extend(amplifications)

        # add data to the DB
        self.add_dataStage01ResequencingAmplifications(data_O);
    def execute_coverageStats_fromGff(self,
                    #analysis_id_I,
                    experiment_id_I,
                    strand_start,strand_stop,scale_factor=True,downsample_factor=0,
                    sample_names_I=[]):
        '''Calculate coverage statistics from gff file
        NOTE: multiple chromosomes not yet supported in sequencing_utilities'''
        #OPTION1
        gffcoverage = gff_coverage();

        ## get the analysis_info
        #analysis_rows = [];
        #analysis_rows = self.get_rows_analysisID_dataStage01ResequencingAnalysis(analysis_id_I);

        if sample_names_I:
            sample_names = sample_names_I;
        else:
            sample_names = [];
            sample_names = self.get_sampleNames_experimentID_dataStage01ResequencingCoverage(experiment_id_I);
        # get the data
        data_O = [];
        for sn in sample_names:
            # get the filename
            filename = None;
            filename = self.get_dataDirs_experimentIDAndSampleName_dataStage01ResequencingCoverage(experiment_id_I,sn);
            #OPTION1
            # calculate the coverage statistics
            gffcoverage.calculate_coverageStats_fromGff(filename[0], 
                strand_start,strand_stop,scale_factor=scale_factor,downsample_factor=downsample_factor,
                experiment_id_I=experiment_id_I, sample_name_I=sn);
            data_O.extend(copy(gffcoverage.coverageStats));
            gffcoverage.clear_data();
            ##OPTION2
            ## calculate the coverage statistics
            #coverateStats = [];
            #coverageStats = calculate_interface_coverageStats_fromGff(filename[0], 
            #    strand_start,strand_stop,scale_factor=scale_factor,downsample_factor=downsample_factor,
            #    experiment_id_I=experiment_id_I, sample_name_I=sn);
            #data_O.extend(coverageStats);
        #add data to the database
        self.add_dataStage01ResequencingCoverageStats(data_O); 
# define search paths manually
import sys
# dependency dirs
sys.path.append('C:/Users/dmccloskey-sbrg/Documents/GitHub/sequencing_analysis')
sys.path.append('C:/Users/dmccloskey-sbrg/Documents/GitHub/io_utilities')
sys.path.append('C:/Users/dmccloskey-sbrg/Documents/GitHub/sequencing_utilities')
sys.path.append('C:/Users/dmccloskey-sbrg/Documents/GitHub/calculate_utilities')

from sequencing_analysis.genome_diff import genome_diff
from sequencing_analysis.mutations_lineage import mutations_lineage
from sequencing_analysis.mutations_endpoints import mutations_endpoints
from sequencing_analysis.mutations_heatmap import mutations_heatmap
from sequencing_analysis.gff_coverage import gff_coverage

#analyze the coverage for a particular strain
gffcoverage = gff_coverage();
gffcoverage.extract_coverage_fromGff(gff_file = '//proline/Users/dmccloskey/Resequencing_DNA/Evo04ptsHIcrrEvo04EP/Evo04ptsHIcrrEvo04EP/data/Evo04ptsHIcrrEvo04EP_reference.gff',
            strand_start = 0,strand_stop = 4640000,
            scale_factor = False,downsample_factor = 2000,
            experiment_id_I = 'ALEsKOs01',
            sample_name_I = 'Evo04ptsHIcrrEvo04EP');
# calculate the coverage statistics
gffcoverage.calculate_coverageStats_fromGff(gff_file = '//proline/Users/dmccloskey/Resequencing_DNA/Evo04ptsHIcrrEvo04EP/Evo04ptsHIcrrEvo04EP/data/Evo04ptsHIcrrEvo04EP_reference.gff',
            strand_start = 0,strand_stop = 4640000,
            scale_factor = False,downsample_factor = 0,
            experiment_id_I = 'ALEsKOs01',
            sample_name_I = 'Evo04ptsHIcrrEvo04EP')
gffcoverage.export_coverageStats('Evo04ptsHIcrrEvo04EP_coverage.csv');
gffcoverage.export_coverage_js();
# find amplifications
gffcoverage.findAndCalculate_amplificationStats_fromGff(gff_file = '//proline/Users/dmccloskey/Resequencing_DNA/Evo04ptsHIcrrEvo04EP/Evo04ptsHIcrrEvo04EP/data/Evo04ptsHIcrrEvo04EP_reference.gff',