Python validate_profile_freq示例，mpathic.qc.validate_profile_freq Python示例

示例#1

0

显示文件

文件： test_profile_freq.py 项目： irelandb/mpathic

    def test_profile_freq_bincounts(self):
        """ Test the ability of mpathic.profile_freq to count frequencies
        """

        print '\nIn test_profile_freq_bincounts...'
        library_files = glob.glob(self.input_dir + 'library_*.txt')
        library_files += glob.glob(self.input_dir + 'dataset_*.txt')
        good_bin_num = 2
        bad_bin_num = 5
        for file_name in library_files:
            print '\t%s =' % file_name,
            description = file_name.split('_')[-1].split('.')[0]
            executable = lambda:\
                profile_freq.main(io.load_dataset(file_name),bin=good_bin_num)
            print '(bin=%d)' % good_bin_num,

            # If bad or library, then profile_freq.main should raise SortSeqError
            if ('_bad' in file_name) or ('library' in file_name):
                try:
                    self.assertRaises(SortSeqError, executable)
                    print 'badtype,',
                except:
                    print 'good (ERROR).'
                    raise

            # If good, then profile_freq.main should produce a valid df
            elif ('_good' in file_name) or ('dataset' in file_name):
                try:
                    df = executable()
                    qc.validate_profile_freq(df)
                    out_file = self.output_dir+\
                        'profile_freq_bin_%s.txt'%description
                    io.write(df, out_file)
                    io.load_profile_freq(out_file)
                    print 'good,',

                except:
                    print 'bad (ERROR).'
                    raise

            # There are no other options
            else:
                raise SortSeqError('Unrecognized class of file_name.')

            # Should always raise an error if bin num is too large
            executable = lambda:\
                profile_freq.main(io.load_dataset(file_name),bin=bad_bin_num)
            print '(bin=%d)' % bad_bin_num,
            try:
                self.assertRaises(SortSeqError, executable)
                print 'badtype.'
            except:
                print 'good (ERROR).'
                raise

示例#2

0

显示文件

文件： test_profile_freq.py 项目： jbkinney/mpathic

    def test_profile_freq_bincounts(self):
        """ Test the ability of mpathic.profile_freq to count frequencies
        """

        print '\nIn test_profile_freq_bincounts...'
        library_files = glob.glob(self.input_dir+'library_*.txt')
        library_files += glob.glob(self.input_dir+'dataset_*.txt')
        good_bin_num = 2
        bad_bin_num = 5
        for file_name in library_files:
            print '\t%s ='%file_name,
            description = file_name.split('_')[-1].split('.')[0]
            executable = lambda:\
                profile_freq.main(io.load_dataset(file_name),bin=good_bin_num)
            print '(bin=%d)'%good_bin_num,

            # If bad or library, then profile_freq.main should raise SortSeqError
            if ('_bad' in file_name) or ('library' in file_name):
                try:
                    self.assertRaises(SortSeqError,executable)
                    print 'badtype,',
                except:
                    print 'good (ERROR).'
                    raise

            # If good, then profile_freq.main should produce a valid df
            elif ('_good' in file_name) or ('dataset' in file_name):
                try:
                    df = executable()
                    qc.validate_profile_freq(df)
                    out_file = self.output_dir+\
                        'profile_freq_bin_%s.txt'%description
                    io.write(df,out_file)
                    io.load_profile_freq(out_file)
                    print 'good,',

                except:
                    print 'bad (ERROR).'
                    raise

            # There are no other options
            else:
                raise SortSeqError('Unrecognized class of file_name.')

            # Should always raise an error if bin num is too large
            executable = lambda:\
                profile_freq.main(io.load_dataset(file_name),bin=bad_bin_num)
            print '(bin=%d)'%bad_bin_num,
            try:
                self.assertRaises(SortSeqError,executable)
                print 'badtype.'
            except:
                print 'good (ERROR).'
                raise

示例#3

0

显示文件

文件： profile_freq.py 项目： jbkinney/mpathic

def main(dataset_df, bin=None, start=0, end=None):
    """
    Computes character frequencies (0.0 to 1.0) at each position

    Arguments:
        dataset_df (pd.DataFrame): A dataframe containing a valid dataset.
        bin (int): A bin number specifying which counts to use
        start (int): An integer specifying the sequence start position
        end (int): An integer specifying the sequence end position

    Returns:
        freq_df (pd.DataFrame): A dataframe containing counts for each nucleotide/amino acid character at each position. 
    """

    # Validate dataset_df
    qc.validate_dataset(dataset_df)

    # Compute counts
    counts_df = profile_ct.main(dataset_df, bin=bin, start=start, end=end)

    # Create columns for profile_freqs table
    ct_cols = [c for c in counts_df.columns if qc.is_col_type(c,'ct_')]
    freq_cols = ['freq_'+c.split('_')[1] for c in ct_cols]

    # Compute frequencies from counts
    freq_df = counts_df[ct_cols].div(counts_df['ct'], axis=0)
    freq_df.columns = freq_cols
    freq_df['pos'] = counts_df['pos']

    # Validate as counts dataframe
    freq_df = qc.validate_profile_freq(freq_df,fix=True)
    return freq_df

示例#4

0

显示文件

文件： test_profile_freq.py 项目： irelandb/mpathic

    def test_profile_freq_totalcounts(self):
        """ Test the ability of mpathic.profile_freq to compute frequencies based on total count values
        """

        print '\nIn test_profile_freq_totalcounts...'
        library_files = glob.glob(self.input_dir + 'library_*.txt')
        library_files += glob.glob(self.input_dir + 'dataset_*.txt')
        for file_name in library_files:
            print '\t%s =' % file_name,
            description = file_name.split('_')[-1].split('.')[0]
            executable = lambda: profile_freq.main(io.load_dataset(file_name))

            # If good, then profile_freq.main should produce a valid df
            if '_good' in file_name:
                try:
                    df = executable()
                    qc.validate_profile_freq(df)
                    out_file = self.output_dir+\
                        'profile_freq_total_%s.txt'%description
                    io.write(df, out_file)
                    io.load_profile_freq(out_file)
                    print 'good.'
                except:
                    print 'bad (ERROR).'
                    raise

            # If bad, then profile_freq.main should raise SortSeqError
            elif '_bad' in file_name:
                try:
                    self.assertRaises(SortSeqError, executable)
                    print 'badtype.'
                except:
                    print 'good (ERROR).'
                    raise

            # There are no other options
            else:
                raise SortSeqError('Unrecognized class of file_name.')

示例#5

0

显示文件

文件： test_profile_freq.py 项目： jbkinney/mpathic

    def test_profile_freq_totalcounts(self):
        """ Test the ability of mpathic.profile_freq to compute frequencies based on total count values
        """

        print '\nIn test_profile_freq_totalcounts...'
        library_files = glob.glob(self.input_dir+'library_*.txt')
        library_files += glob.glob(self.input_dir+'dataset_*.txt')
        for file_name in library_files:
            print '\t%s ='%file_name,
            description = file_name.split('_')[-1].split('.')[0]
            executable = lambda: profile_freq.main(io.load_dataset(file_name))

            # If good, then profile_freq.main should produce a valid df
            if '_good' in file_name:
                try:
                    df = executable()
                    qc.validate_profile_freq(df)
                    out_file = self.output_dir+\
                        'profile_freq_total_%s.txt'%description
                    io.write(df,out_file)
                    io.load_profile_freq(out_file)
                    print 'good.'
                except:
                    print 'bad (ERROR).'
                    raise

            # If bad, then profile_freq.main should raise SortSeqError
            elif '_bad' in file_name:
                try:
                    self.assertRaises(SortSeqError,executable)
                    print 'badtype.'
                except:
                    print 'good (ERROR).'
                    raise

            # There are no other options
            else:
                raise SortSeqError('Unrecognized class of file_name.')