def _build_SASA_dict(self): file_handlers = FileHandlers() self.SASA_dict[self.filename] = {} self._run_POPS() self._get_data() for line in self.data: fields = line.split('\t') cleaned = file_handlers.clean(fields) if len(cleaned) == 9: (position, aa, tot_SA, SASA, frac_SASA, phob, phil) = (cleaned[2], cleaned[0], cleaned[8], cleaned[5], cleaned[6], cleaned[3], cleaned[4]) self.SASA_dict[self.filename][position] = [aa, tot_SA, SASA, frac_SASA, phob, phil]
def build_SASA_dict(out_files): SASA_dict = {} for path in out_files: file_handlers = FileHandlers() file_name = file_handlers.get_file_name(path) SASA_dict[file_name] = {} for line in open(path): file_handlers2 = FileHandlers() fields = line.split('\t') cleaned = file_handlers2.clean(fields) if len(cleaned) == 9: #and int(cleaned[2]) >= 1: (position, aa, tot_SA, SASA, frac_SASA, phob, phil) = (cleaned[2], cleaned[0], cleaned[8], cleaned[5], cleaned[6], cleaned[3], cleaned[4]) SASA_dict[file_name][position] = [aa, tot_SA, SASA, frac_SASA, phob, phil] return SASA_dict
def _build_data_dict(self, file_tag): self.data_dict = {} self._get_data(file_tag) file_handlers = FileHandlers() for line in self.data: fields = line.split('\t') cleaned = file_handlers.clean(fields) self.data_dict[cleaned[0]] = float(cleaned[1])
def BuildRulesDict(): """Construct a dictionary from the .rul file. Each key-value pair is constructed from a single line of the .rul file The .rul file has the following format: this replace_this R A G Y C T M A C K G T S C G W A T H A C T B C G T V A C G D A G T N A C G T Parameters ---------- none Returns ------- rules_dict: dict dictionary in which the key is a string resulting from joining the nucleotides (A, G, C, T) in columns 2-5 of each line from the .rul file and the value corresponds to the string in the first column of each line of the .rul file Examples -------- >>> rules_dict = BuildRulesDict() """ file_handlers = FileHandlers() rules_file = LoadFiles('rul') rules_dict = {} try: for line in open(rules_file[0]): fields = line.split("\t") cleaned = file_handlers.clean(fields) if ('this' and 'replace_this') in line: pass else: if ''.join((cleaned[1:])) not in rules_dict: rules_dict[''.join((cleaned[1:]))] = cleaned[0] else: pass return rules_dict except IOError: print("An error occurred while trying to load the rules file." + "Make sure the file is located in your current working directory.")
def BuildUsageDict(): """Build a codon usage dictionary based on the user selected codon usage file Useful for downstream calculations involving known codon usage frequencies in a given organism Parameters ---------- none Returns ------- usage_dict: dict Dictionary of lists of dictionaries for codon usage. Dictionary has the following structure: { F : [{TTT: 0.58}, {TTC: 0.42}], L : [{TTA: 0.14}, {TTG: 0.13}, {CTT: 0.12}, {CTC: 0.1}, {CTA: 0.04}, {CTG: 0.47}], I : [{ATT: 0.49}, {ATC: 0.39}, {ATA: 0.11}], ... ... ... G : [{GGT: 0.35}, {GGC: 0.37}, {GGA: 0.13}, {GGG: 0.15}] } Examples -------- >>> usage_dict = BuildUsageDict() """ file_handlers = FileHandlers() all_files = LoadFiles('txt') selection_int, file_path, file_name = GetDataFile(all_files) usage_dict = {} try: for line in open(file_path): fields = line.split("\t") cleaned = file_handlers.clean(fields) if ('Codon' and 'name' and 'prob') in line: pass else: if cleaned[1] in usage_dict: usage_dict[cleaned[1]].append({cleaned[0]: cleaned[2]}) else: usage_dict[cleaned[1]] = [{cleaned[0]: cleaned[2]}] return usage_dict except IOError: print("An error occurred while trying to load the data file." + "Make sure the file is located in your current working directory.")
def _build_SASA_dict(self): file_handlers = FileHandlers() self.SASA_dict[self.filename] = {} self._run_POPS() self._get_data() for line in self.data: fields = line.split('\t') cleaned = file_handlers.clean(fields) if len(cleaned) == 9: (position, aa, tot_SA, SASA, frac_SASA, phob, phil) = (cleaned[2], cleaned[0], cleaned[8], cleaned[5], cleaned[6], cleaned[3], cleaned[4]) self.SASA_dict[self.filename][position] = [ aa, tot_SA, SASA, frac_SASA, phob, phil ]
def _parse_ddG_data(self): file_handlers = FileHandlers() self._get_data() ddG_data_map = {} for i in range(len(self.ddG_data)): fields = self.ddG_data[i].split(' ') cleaned = file_handlers.clean(fields) while cleaned.count('') > 0: cleaned.remove('') if len(cleaned[1].split("-")) < 2: ## ignore first line pass else: chain, mutation = cleaned[2].split("-") wt_res, position, mut_res = mutation[0], mutation[1:-1], mutation[-1] ddG_data_map[(chain, wt_res, position, mut_res)] = cleaned[3] self.ddG_data_map = ddG_data_map
def _build_data_structure(self, lines): file_handlers = FileHandlers() feature_data_dict = {} for i in range(len(self.sequence_annotations)): feature_data_dict[self.sequence_annotations[i][1]] = \ [[], self.sequence_annotations[i][3]] residues = [] current_chain = self.sequence_annotations[i][0] for line in lines: fields = line.split('\t') cleaned = file_handlers.clean(fields) chain = cleaned[1] residue_number = cleaned[2] if chain == current_chain: residues.append(residue_number) feature_data_dict[self.sequence_annotations[i][1]][0] = residues # {gene_name: [ [residue numbers of interest], sequence ]} return feature_data_dict
def GetUserSelection(sorted_dict): """Prompt user for selection of amino acids to remove from list Parameters ---------- sorted_dict: dict Dictionary of lists of dictionaries for codon usage. For example, the output of BuildUsageDict() would work as input. In this case, any dictionary that has single letter amino acid symbols as keys would work Returns ------- aa_list: list List of amino acids that the user has entered. Amino acid symbols are converted to uppercase and all white space is removed. Examples -------- >>> selection = GetUserSelection() """ file_handlers = FileHandlers() while True: selection = raw_input("Choose amino acids to remove (multiple amino " + "acids are indicated as a comma-separated list: ") aa_list = file_handlers.clean(selection.split(',')) try: for i in range(len(aa_list)): if aa_list[i].upper() in sorted_dict: aa_list[i] = aa_list[i].upper() else: raise ValueError() return aa_list except ValueError: print("Invalid entry. You must enter a letter or series of " + "comma-separated letters corresponding to the amino acids " + "you wish to omit.")