Пример #1
0
#!/usr/bin/env python
import os
import csv
import time
import sys
from subprocess import check_call

import utils
from opener import opener
import plotting
from mutefreqer import MuteFreqer
has_root = plotting.check_root()

# ----------------------------------------------------------------------------------------
class ParameterCounter(object):
    """ class to keep track of how many times we've seen each gene version, erosion length,
    insertion (length and base content), and mutation """
    def __init__(self, germline_seqs):   #, base_outdir='', plotdir='', write_parameters=True, plot_parameters=True):
        self.total = 0
        self.counts = {}
        self.counts['all'] = {}
        for column in utils.column_dependencies:
            self.counts[column] = {}
        for bound in utils.boundaries:
            self.counts[bound + '_insertion_content'] = {'A':0, 'C':0, 'G':0, 'T':0}  # base content of each insertion
        self.counts['seq_content'] = {'A':0, 'C':0, 'G':0, 'T':0}
        self.mutefreqer = MuteFreqer(germline_seqs)  #, self.base_outdir, self.plotdir, write_parameters=self.write_parameters, plot_parameters=self.plot_parameters)

    # ----------------------------------------------------------------------------------------
    def clean(self):
        """ remove all the parameter files """
#!/usr/bin/env python
import os
import csv
import time
import sys
from subprocess import check_call

import utils
from opener import opener
import plotting
from mutefreqer import MuteFreqer

has_root = plotting.check_root()


# ----------------------------------------------------------------------------------------
class ParameterCounter(object):
    """ class to keep track of how many times we've seen each gene version, erosion length,
    insertion (length and base content), and mutation """
    def __init__(
        self, germline_seqs
    ):  #, base_outdir='', plotdir='', write_parameters=True, plot_parameters=True):
        self.total = 0
        self.counts = {}
        self.counts['all'] = {}
        for column in utils.column_dependencies:
            self.counts[column] = {}
        for bound in utils.boundaries:
            self.counts[bound + '_insertion_content'] = {
                'A': 0,
                'C': 0,
import sys
import utils
import plotting
from hist import Hist
from subprocess import check_call

assert plotting.check_root()

# Columns for which we just want to know, Did we guess the right value? (for other columns, we store guess - true)
bool_columns = ('v_gene', 'd_gene', 'j_gene')

class PerformancePlotter(object):
    # ----------------------------------------------------------------------------------------
    def __init__(self, germlines, plotdir, name):
        self.germlines = germlines
        self.plotdir = plotdir
        self.name = name
        utils.prep_dir(self.plotdir + '/plots', wildling=None, multilings=['*.csv', '*.svg', '*.root'])
        self.values = {}
        for column in utils.index_columns:
            if column == 'cdr3_length':  # kind of finicky to figure out what this is, so I don't always set it
                continue
            self.values[column] = {}
            if column in bool_columns:
                self.values[column]['right'] = 0
                self.values[column]['wrong'] = 0
        self.values['hamming_to_true_naive'] = {}
        for region in utils.regions:
            self.values[region + '_hamming_to_true_naive'] = {}
            self.values[region + '_hamming_to_true_naive_normed'] = {}
        # for bound in utils.boundaries: