Пример #1
0
	def _build_SASA_dict(self):
		file_handlers = FileHandlers()
		self.SASA_dict[self.filename] = {}
		self._run_POPS()
		self._get_data()
		for line in self.data:
			fields = line.split('\t')
			cleaned = file_handlers.clean(fields)
			if len(cleaned) == 9: 
				(position, 
				aa, 
				tot_SA, 
				SASA, 
				frac_SASA, 
				phob, 
				phil) = (cleaned[2],
						cleaned[0],
						cleaned[8],
						cleaned[5],
						cleaned[6],
						cleaned[3],
						cleaned[4])
				self.SASA_dict[self.filename][position] = [aa, 
													tot_SA, 
													SASA, 
													frac_SASA, 
													phob, 
													phil]
Пример #2
0
def build_SASA_dict(out_files):
	SASA_dict = {}
	for path in out_files:
		file_handlers = FileHandlers()
		file_name = file_handlers.get_file_name(path)
		SASA_dict[file_name] = {}
		for line in open(path):
			file_handlers2 = FileHandlers()
			fields = line.split('\t')
			cleaned = file_handlers2.clean(fields)
			if len(cleaned) == 9: #and int(cleaned[2]) >= 1:
				(position, 
				aa, 
				tot_SA, 
				SASA, 
				frac_SASA, 
				phob, 
				phil) = (cleaned[2],
						cleaned[0],
						cleaned[8],
						cleaned[5],
						cleaned[6],
						cleaned[3],
						cleaned[4])
				SASA_dict[file_name][position] = [aa, 
													tot_SA, 
													SASA, 
													frac_SASA, 
													phob, 
													phil]
	return SASA_dict
Пример #3
0
	def _build_data_dict(self, file_tag):
		self.data_dict = {}
		self._get_data(file_tag)
		file_handlers = FileHandlers()
		for line in self.data:
			fields = line.split('\t')
			cleaned = file_handlers.clean(fields)
			self.data_dict[cleaned[0]] = float(cleaned[1])
Пример #4
0
 def _build_data_dict(self, file_tag):
     self.data_dict = {}
     self._get_data(file_tag)
     file_handlers = FileHandlers()
     for line in self.data:
         fields = line.split('\t')
         cleaned = file_handlers.clean(fields)
         self.data_dict[cleaned[0]] = float(cleaned[1])
Пример #5
0
def BuildRulesDict():
	"""Construct a dictionary from the .rul file. Each key-value pair is 
	constructed from a single line of the .rul file The .rul file has the 
	following format:

	this	replace_this
	R	A	G
	Y	C	T
	M	A	C
	K	G	T
	S	C	G
	W	A	T
	H	A	C	T
	B	C	G	T
	V	A	C	G
	D	A	G	T
	N	A	C	G	T

	Parameters
	----------
	none

	Returns
	-------
	rules_dict: dict
		dictionary in which the key is a string resulting from joining the 
		nucleotides (A, G, C, T) in columns 2-5 of each line from the .rul
		file and the value corresponds to the string in the first column of
		each line of the .rul file

	Examples
	--------
	>>> rules_dict = BuildRulesDict()
	"""
	file_handlers = FileHandlers()
	rules_file = LoadFiles('rul')
	rules_dict = {}
	try:
		for line in open(rules_file[0]):
			fields = line.split("\t")
			cleaned = file_handlers.clean(fields)
			if ('this' and 'replace_this') in line:
				pass
			else:
				if ''.join((cleaned[1:])) not in rules_dict:
					rules_dict[''.join((cleaned[1:]))] = cleaned[0]
				else:
					pass
		return rules_dict
	except IOError:
		print("An error occurred while trying to load the rules file." +
		"Make sure the file is located in your current working directory.")			
Пример #6
0
def BuildUsageDict():
	"""Build a codon usage dictionary based on the user selected codon usage
	file
		
	Useful for downstream calculations involving known codon usage frequencies
	in a given organism

	Parameters
	----------
	none

	Returns
	-------
	usage_dict: dict
		Dictionary of lists of dictionaries for codon usage. Dictionary has the
		following structure:
		{
			F : [{TTT: 0.58}, {TTC: 0.42}],
			L : [{TTA: 0.14}, {TTG: 0.13}, {CTT: 0.12}, {CTC: 0.1}, 
					{CTA: 0.04}, {CTG: 0.47}],
			I : [{ATT: 0.49}, {ATC: 0.39}, {ATA: 0.11}],
			...
			...
			...
			G : [{GGT: 0.35}, {GGC: 0.37}, {GGA: 0.13}, {GGG: 0.15}]
		}

	Examples
	--------
	>>> usage_dict = BuildUsageDict()
	"""
	file_handlers = FileHandlers()
	all_files = LoadFiles('txt')
	selection_int, file_path, file_name = GetDataFile(all_files)
	usage_dict = {}
	try:
		for line in open(file_path):
			fields = line.split("\t")
			cleaned = file_handlers.clean(fields)
			if ('Codon' and 'name' and 'prob') in line:
				pass
			else:
				if cleaned[1] in usage_dict:
					usage_dict[cleaned[1]].append({cleaned[0]: cleaned[2]})
				else:
					usage_dict[cleaned[1]] = [{cleaned[0]: cleaned[2]}]
		return usage_dict
	except IOError:
		print("An error occurred while trying to load the data file." +
		"Make sure the file is located in your current working directory.")
Пример #7
0
 def _build_SASA_dict(self):
     file_handlers = FileHandlers()
     self.SASA_dict[self.filename] = {}
     self._run_POPS()
     self._get_data()
     for line in self.data:
         fields = line.split('\t')
         cleaned = file_handlers.clean(fields)
         if len(cleaned) == 9:
             (position, aa, tot_SA, SASA, frac_SASA, phob,
              phil) = (cleaned[2], cleaned[0], cleaned[8], cleaned[5],
                       cleaned[6], cleaned[3], cleaned[4])
             self.SASA_dict[self.filename][position] = [
                 aa, tot_SA, SASA, frac_SASA, phob, phil
             ]
Пример #8
0
	def _parse_ddG_data(self):
		file_handlers = FileHandlers()
		self._get_data()
		ddG_data_map = {}
		for i in range(len(self.ddG_data)):
			fields = self.ddG_data[i].split(' ')
			cleaned = file_handlers.clean(fields)
			while cleaned.count('') > 0:
				cleaned.remove('')
			if len(cleaned[1].split("-")) < 2:	## ignore first line
				pass
			else:
				chain, mutation = cleaned[2].split("-")
				wt_res, position, mut_res = mutation[0], mutation[1:-1], mutation[-1]
				ddG_data_map[(chain, wt_res, position, mut_res)] = cleaned[3]
		self.ddG_data_map = ddG_data_map
Пример #9
0
 def _build_data_structure(self, lines):
     file_handlers = FileHandlers()
     feature_data_dict = {}
     for i in range(len(self.sequence_annotations)):
         feature_data_dict[self.sequence_annotations[i][1]] = \
             [[], self.sequence_annotations[i][3]]
         residues = []
         current_chain = self.sequence_annotations[i][0]
         for line in lines:
             fields = line.split('\t')
             cleaned = file_handlers.clean(fields)
             chain = cleaned[1]
             residue_number = cleaned[2]
             if chain == current_chain:
                 residues.append(residue_number)
         feature_data_dict[self.sequence_annotations[i][1]][0] = residues
     # {gene_name: [ [residue numbers of interest], sequence ]}
     return feature_data_dict
Пример #10
0
def GetUserSelection(sorted_dict):
	"""Prompt user for selection of amino acids to remove from list

	Parameters
	----------
	sorted_dict: dict
		Dictionary of lists of dictionaries for codon usage. For example, the
		output of BuildUsageDict() would work as input. In this case, any 
		dictionary that has single letter amino acid symbols as keys would
		work

	Returns
	-------
	aa_list: list
		List of amino acids that the user has entered. Amino acid symbols are
		converted to uppercase and all white space is removed.
	
	Examples
	--------
	>>> selection = GetUserSelection()
	"""	
	file_handlers = FileHandlers()
	while True:
		selection = raw_input("Choose amino acids to remove (multiple amino " +
							"acids are indicated as a comma-separated list: ")
		aa_list = file_handlers.clean(selection.split(','))
		try:
			for i in range(len(aa_list)):
				if aa_list[i].upper() in sorted_dict:
					aa_list[i] = aa_list[i].upper()
				else:
					raise ValueError()
			return aa_list
		except ValueError:
				print("Invalid entry. You must enter a letter or series of " +
				"comma-separated letters corresponding to the amino acids " + 
				"you wish to omit.")