def cache_parameters(self): assert self.args.n_sets == 1 # er, could do it for n > 1, but I'd want to think through a few things first assert self.args.plotdir is not None sw_parameter_dir = self.args.parameter_dir + '/sw' waterer = Waterer(self.args, self.input_info, self.reco_info, self.germline_seqs, parameter_dir=sw_parameter_dir, write_parameters=True, plotdir=self.args.plotdir + '/sw') waterer.run() self.write_hmms(sw_parameter_dir, waterer.info['all_best_matches']) parameter_in_dir = sw_parameter_dir for ibw in range(self.args.baum_welch_iterations): parameter_out_dir = self.args.parameter_dir + '/hmm' hmm_plotdir = self.args.plotdir + '/hmm' if self.args.baum_welch_iterations > 1: parameter_out_dir += '-' + str(ibw) hmm_plotdir += '-' + str(ibw) self.run_hmm('viterbi', waterer.info, parameter_in_dir=parameter_in_dir, parameter_out_dir=parameter_out_dir, hmm_type='k=1', count_parameters=True, plotdir=hmm_plotdir) self.write_hmms(parameter_out_dir, waterer.info['all_best_matches']) parameter_in_dir = parameter_out_dir if not self.args.no_clean: os.rmdir(self.args.workdir)
def run_algorithm(self, algorithm): if not os.path.exists(self.args.parameter_dir): raise Exception('ERROR ' + self.args.parameter_dir + ' d.n.e') waterer = Waterer(self.args, self.input_info, self.reco_info, self.germline_seqs, parameter_dir=self.args.parameter_dir, write_parameters=False) waterer.run() self.run_hmm(algorithm, waterer.info, parameter_in_dir=self.args.parameter_dir, hmm_type='k=nsets', \ count_parameters=self.args.plot_parameters, plotdir=self.args.plotdir) # self.clean(waterer) if not self.args.no_clean: os.rmdir(self.args.workdir)
def partition(self): assert os.path.exists(self.args.parameter_dir) # run smith-waterman waterer = Waterer(self.args, self.input_info, self.reco_info, self.germline_seqs, parameter_dir=self.args.parameter_dir, write_parameters=False) waterer.run() # cdr3 length partitioning cdr3_cluster = False # don't precluster on cdr3 length for the moment -- I cannot accurately infer cdr3 length in some sequences, so I need a way to pass query seqs to the clusterer with several possible cdr3 lengths (and I don't know how to do that!) cdr3_length_clusters = None if cdr3_cluster: cdr3_length_clusters = self.cdr3_length_precluster(waterer) hamming_clusters = self.hamming_precluster(cdr3_length_clusters) # stripped_clusters = self.run_hmm('forward', waterer.info, self.args.parameter_dir, preclusters=hamming_clusters, stripped=True) hmm_clusters = self.run_hmm('forward', waterer.info, self.args.parameter_dir, preclusters=hamming_clusters, hmm_type='k=2', make_clusters=True) self.run_hmm('forward', waterer.info, self.args.parameter_dir, preclusters=hmm_clusters, hmm_type='k=preclusters', prefix='k-', make_clusters=False) # self.run_hmm('viterbi', waterer.info, self.args.parameter_dir, preclusters=hmm_clusters, hmm_type='k=preclusters', prefix='k-', make_clusters=False) # self.clean(waterer) if not self.args.no_clean: os.rmdir(self.args.workdir)