def loadSeqAndChain(seq_file, k, suppress_save=False, mc_file=None, retain_n=False): """Load the sequence and the Markov Chain List. Load the MC list from a file if it exists. If not, create the chain and save it to the file for the next use (skip the save if suppressed).""" template_seq = str(SeqIO.read(seq_file, 'fasta').seq) # Cut out all the maximul prefix and suffix of ambiguity codes -- which will have no effect on the Markov chain construction. if not retain_n: start = 0 while template_seq[start] not in bases: start += 1 finish = len(template_seq) while template_seq[finish - 1] not in bases: finish -= 1 coord_adjust = start template_seq = template_seq[start:finish] else: coord_adjust = 0 mc_file = re.sub("\.(fa|fasta)$", ".pmc%d" % (k), seq_file) if mc_file is None else mc_file if os.path.exists(mc_file): markov_list = markov_gen.read_pmck(mc_file) else: markov_list = markov_gen.MarkovArray(k, template_seq) if not suppress_save: markov_gen.pickle_markov_list(markov_list, mc_file) return template_seq, markov_list, coord_adjust
def loadSeqAndChain(seq_file, k, suppress_save = False, mc_file = None, retain_n = False): """Load the sequence and the Markov Chain List. Load the MC list from a file if it exists. If not, create the chain and save it to the file for the next use (skip the save if suppressed). Parameters: * seq_file: The sequence file. * k: The order of the markov chain. * suppress_save: Boolean. If true, don't save the generated MC file. (Can't imagine why we would want this.) * mc_file: The name of the mc_file to use. (Derive from seq_file if not provided.) * retrain_n: If false, we will be cutting of the largest possible N* prefix and suffix. Return: A tuple: 1. The chromosome sequence. 2. The markov chain 3. Where we will start in the template sequence (in case a prefix has been removed). 4. Where we will end in the templace sequence (in case a suffix has been removed). """ template_seq = str(SeqIO.read(seq_file, 'fasta').seq) # Cut out all the maximul prefix and suffix of ambiguity codes -- which will have no effect on the Markov chain construction. start, finish = 0, len(template_seq) if not retain_n: # Cut down the chromsome to the first real base at each end -- eliminate trailing Ns. while template_seq[start] not in bases: start += 1 while template_seq[finish-1] not in bases: finish -= 1 mc_file = re.sub("\.(fa|fasta)$", ".pmc%d" % (k), seq_file) if mc_file is None else mc_file if os.path.exists(mc_file): markov_list = markov_gen.read_pmck(mc_file) else: markov_list = markov_gen.MarkovArray(k, template_seq) if not suppress_save: markov_gen.pickle_markov_list(markov_list, mc_file) return template_seq, markov_list, start, finish
def loadSeqAndChain(seq_file, k, suppress_save = False, mc_file = None, retain_n = False): """Load the sequence and the Markov Chain List. Load the MC list from a file if it exists. If not, create the chain and save it to the file for the next use (skip the save if suppressed).""" template_seq = str(SeqIO.read(seq_file, 'fasta').seq) # Cut out all the maximul prefix and suffix of ambiguity codes -- which will have no effect on the Markov chain construction. if not retain_n: start = 0 while template_seq[start] not in bases: start += 1 finish = len(template_seq) while template_seq[finish-1] not in bases: finish -= 1 coord_adjust = start template_seq = template_seq[start:finish] else: coord_adjust = 0 mc_file = re.sub("\.(fa|fasta)$", ".pmc%d" % (k), seq_file) if mc_file is None else mc_file if os.path.exists(mc_file): markov_list = markov_gen.read_pmck(mc_file) else: markov_list = markov_gen.MarkovArray(k, template_seq) if not suppress_save: markov_gen.pickle_markov_list(markov_list, mc_file) return template_seq, markov_list, coord_adjust
def loadSeqAndChain(seq_file, k, suppress_save=False, mc_file=None, retain_n=False): """Load the sequence and the Markov Chain List. Load the MC list from a file if it exists. If not, create the chain and save it to the file for the next use (skip the save if suppressed). Parameters: * seq_file: The sequence file. * k: The order of the markov chain. * suppress_save: Boolean. If true, don't save the generated MC file. (Can't imagine why we would want this.) * mc_file: The name of the mc_file to use. (Derive from seq_file if not provided.) * retrain_n: If false, we will be cutting of the largest possible N* prefix and suffix. Return: A tuple: 1. The chromosome sequence. 2. The markov chain 3. Where we will start in the template sequence (in case a prefix has been removed). 4. Where we will end in the templace sequence (in case a suffix has been removed). """ template_seq = str(SeqIO.read(seq_file, 'fasta').seq) # Cut out all the maximul prefix and suffix of ambiguity codes -- which will have no effect on the Markov chain construction. start, finish = 0, len(template_seq) if not retain_n: # Cut down the chromsome to the first real base at each end -- eliminate trailing Ns. while template_seq[start] not in bases: start += 1 while template_seq[finish - 1] not in bases: finish -= 1 mc_file = re.sub("\.(fa|fasta)$", ".pmc%d" % (k), seq_file) if mc_file is None else mc_file if os.path.exists(mc_file): markov_list = markov_gen.read_pmck(mc_file) else: markov_list = markov_gen.MarkovArray(k, template_seq) if not suppress_save: markov_gen.pickle_markov_list(markov_list, mc_file) return template_seq, markov_list, start, finish