Exemplo n.º 1
0
	def calculateProteinRatioSignificance(self, num_nearest_proteins, ratio_field="ratio_hl_normalized", abundance_field="intensity"):
		# Limit significance calculations to proteins with ratios
		recs_with_ratios = [r for r in self.protein_data.values() if not na.isNA(getattr(r,ratio_field))]
		# Sort proteins by estimated abundance
		recs = sorted(recs_with_ratios, key=lambda x: getattr(x, abundance_field))
		rec_norm_hl = [getattr(r,ratio_field) for r in recs]
		n = num_nearest_proteins
		half_n = int(math.ceil(n/2.0))
		for ti in range(len(recs)):
			# fetch nearest N proteins by intensity
			if ti < half_n:
				beg_i = 0
				end_i = int(min(n, len(recs)))
			elif ti + half_n >= len(recs):
				beg_i = len(recs)-half_n
				end_i = len(recs)
			else:
				beg_i = ti - half_n
				end_i = ti + half_n
			log_ratios = [math.log(getattr(r,ratio_field)) for r in recs[beg_i:end_i]]
			my_log_ratio = math.log(getattr(recs[ti],ratio_field))
			(n,m,sd,se) = stats.StatsSummary(log_ratios)
			z = (my_log_ratio - m)/sd
			p_z = stats.Prob_Z(z)
			recs[ti].significance = p_z
Exemplo n.º 2
0
Arquivo: util.py Projeto: dad/base
	def inferHandlers(self, max_lines=100):
		# DAD: run through fields until we've seen at least one non-NA for each.
		handlers_identified = False
		li = 0
		self.cur_line = self.cache.getLine(li)
		self.handlers = None
		inferred_string = []
		while not handlers_identified and li < max_lines and self.isValid():
			if not self.isComment() and not self.isBlank():
				# Not a comment line -- parse it.
				if self.strip:
					self.cur_line = self.cur_line.strip()
				flds = self.cur_line.split(self.delim)
				flds[-1] = flds[-1].strip() # Get rid of \n
				# Initialize empty handler list if we haven't done so already
				if self.handlers is None:
					self.handlers = [None]*len(flds)
					inferred_string = ['X']*len(flds)
				#if len(flds) != len(self.handlers):
				#	print flds
				assert len(flds) == len(self.handlers), "Number of fields {} not equal to number of handlers {}".format(len(flds), len(self.handlers))
				for hi in range(len(self.handlers)):
					fld = flds[hi]
					if self.handlers[hi] is None:
						if not na.isNA(fld):
							handler_key = self.inferHandlerKey(fld)
							inferred_string[hi] = handler_key
							self.handlers[hi] = self.handler_dict[handler_key]
					else: # handler has already been found; just confirm, and upgrade if necessary
						try:
							val = self.handlers[hi](fld)
						except ValueError:
							#print "upgrading handler", inferred_string[hi],
							handler_key = self.inferHandlerKey(fld)
							inferred_string[hi] = handler_key
							self.handlers[hi] = self.handler_dict[handler_key]
							#print "to", handler_key

				# We're finished when all handlers are not None.
				handlers_identified = len([h for h in self.handlers if h is None]) == 0
			if not handlers_identified:
				li += 1
				try:
					self.cur_line = self.cache.getLine(li)
				except ReaderEOFError:
					# We've reached the end of the file with an inconclusive result -- some fields
					# still can't have types inferred.
					# Just assume everything's a string.
					for hi in range(len(self.handlers)):
						if self.handlers[hi] is None:
							self.handlers[hi] = self.handler_dict["s"]
					handlers_identified = True
		if not handlers_identified and li >= max_lines:
			# Went past the allowed number of lines to look ahead; set all unset handlers to strings
			for hi in range(len(self.handlers)):
				if self.handlers[hi] is None:
					self.handlers[hi] = self.handler_dict["s"]
		#print inferred_string
		inferred_string = ''.join(inferred_string)
		return inferred_string
Exemplo n.º 3
0
def naIntParser(x):
    v = None
    try:
        v = int(x)
    except ValueError as ve:
        if not na.isNA(x):
            raise ve
    return v
Exemplo n.º 4
0
def naSciParser(x):
    v = None
    try:
        v = float(x)
    except ValueError as ve:
        if not na.isNA(x):
            raise ve
    return v
Exemplo n.º 5
0
Arquivo: util.py Projeto: dad/base
def looseIntParser(x):
    v = None
    try:
        v = int(x)
    except ValueError:
        if not na.isNA(x):
            v = naFloatParser(x)
    return v
Exemplo n.º 6
0
Arquivo: util.py Projeto: dad/base
def naFloatParser(x):
    v = None
    try:
        v = float(x)
    except ValueError as ve:
        if not na.isNA(x):
            raise ve
    return v
Exemplo n.º 7
0
def looseIntParser(x):
    v = None
    try:
        v = int(x)
    except ValueError:
        if not na.isNA(x):
            v = naFloatParser(x)
    return v
Exemplo n.º 8
0
	def add(self, x):
		if not na.isNA(x):
			self._sum += x
			self._sum_sq += x*x
			self._n += 1
			if self._store:
				self._data.append(x)
		else:
			self._na += 1
Exemplo n.º 9
0
 def __str__(self):
     res = None
     if not na.isNA(self.var):
         try:
             trans_var = self.transform(self.var)
             res = self.format.format(trans_var)
         except ValueError:
             pass
         except TypeError:
             pass
     else:
         res = na.NA
     return res
Exemplo n.º 10
0
	def add(self, x):
		if not na.isNA(x):
			self._sum += x
			self._sum_sq += x*x
			self._n += 1
			if self._min > x:
				self._min = x
			if self._max < x:
				self._max = x
			if self._store:
				self._data.append(x)
		else:
			self._na += 1
Exemplo n.º 11
0
Arquivo: util.py Projeto: dad/base
 def __str__(self):
     res = None
     if not na.isNA(self.var):
         try:
             trans_var = self.transform(self.var)
             res = self.format.format(trans_var)
         except ValueError:
             pass
         except TypeError:
             pass
     else:
         res = na.NA
     return res
Exemplo n.º 12
0
Arquivo: util.py Projeto: dad/base
def naStringParser(x):
    """A parser that respects NA's."""
    v = None
    if not na.isNA(x):
        v = str(x)
    return v
Exemplo n.º 13
0
	def normalizeHeavyIntensity(self, weight):
		new_int = [intens/weight for intens in self.intensity_h_list if not na.isNA(intens)]
		self.intensity_h_list = new_int
Exemplo n.º 14
0
	def normalized_ratio_hm(self):
		res = None
		med = self.getNormalizedHeavyMediumRatioSummary().median
		if not na.isNA(med):
			res = math.exp(med)
		return res
Exemplo n.º 15
0
	def normalizeMediumIntensity(self, weight):
		new_int = [intens/weight for intens in self.intensity_m_list if not na.isNA(intens)]
		self.intensity_m_list = new_int
Exemplo n.º 16
0
			if prot_ids == []:
				line = '{0}\tNA\t0'.format(pep.key)
			else:
				line = '{0}\t{1}\t{2}'.format(p, ",".join(prot_ids), len(prot_ids))

			output_fields = []
			for rat in ['hl','ml','hm']:
				ratio_stats = pep.getRatioSummary(rat)
				ratio_norm_stats = pep.getNormalizedRatioSummary(rat)
				output_fields.append(util.FieldFormatter(ratio_stats.median,"{0:e}"))
				output_fields.append(util.FieldFormatter(ratio_stats.mean,"{0:e}"))
				output_fields.append(util.FieldFormatter(ratio_norm_stats.median,"{0:e}"))
				output_fields.append(util.FieldFormatter(ratio_norm_stats.mean,"{0:e}"))
				rn_lower_95 = None
				rn_upper_95 = None
				if not na.isNA(ratio_norm_stats.se):
					rn_lower_95 = math.exp(math.log(ratio_norm_stats.mean)-1.96*ratio_norm_stats.se)
					rn_upper_95 = math.exp(math.log(ratio_norm_stats.mean)+1.96*ratio_norm_stats.se)
				output_fields.append(util.FieldFormatter(rn_lower_95,"{0:e}"))
				output_fields.append(util.FieldFormatter(rn_upper_95,"{0:e}"))
				output_fields.append(util.FieldFormatter(ratio_stats.n,"{0:d}"))
				output_fields.append(util.FieldFormatter(ratio_stats.sd,"{0:e}"))
				output_fields.append(util.FieldFormatter(ratio_norm_stats.sd,"{0:e}"))
				# Intensity ratios -- no "normalized" ratios here.
				iratio_stats = pep.getIntensityRatioSummary(rat)
				output_fields.append(util.FieldFormatter(iratio_stats.median,"{0:e}"))
				output_fields.append(util.FieldFormatter(iratio_stats.mean,"{0:e}"))
				output_fields.append(util.FieldFormatter(iratio_stats.n,"{0:d}"))
				output_fields.append(util.FieldFormatter(iratio_stats.sd,"{0:e}"))

			output_fields.append(util.FieldFormatter(pep.intensity,"{0:e}"))
Exemplo n.º 17
0
                                                  (len(pep.sequence) + pos))
         outs.write(">{}\n{}\n".format(pepid, line))
 elif options.output_type == 'ratio':
     outs.write(
         "seq\tmod.seq\tbegin\tend\tratio\tint.ratio\tintensity\tintensity.h\tintensity.l\n"
     )
     n_written = 0
     for (pos, pep) in pep_list:
         #ratio_stats = pep.getHeavyLightRatioSummary()
         for (ri, ratio) in enumerate(pep.heavy_light_ratio_list):
             inth = pep.intensity_h_list[ri]
             intl = pep.intensity_l_list[ri]
             inten = None
             # Ratio of intensities
             int_ratio = None
             if not (na.isNA(inth) or na.isNA(intl)):
                 inten = inth + intl
                 if intl > 0:
                     int_ratio = inth / float(intl)
             outs.write(
                 "{seq}\t{modseq}\t{begin}\t{end}\t{ratio}\t{intratio}\t{inten}\t{inth}\t{intl}\n"
                 .format(seq=pep.sequence,
                         modseq=pep.modified_sequence,
                         begin=pos + 1,
                         end=pos + len(pep.sequence),
                         ratio=na.formatNA(ratio),
                         intratio=na.formatNA(int_ratio),
                         inten=na.formatNA(inten),
                         inth=na.formatNA(inth),
                         intl=na.formatNA(intl)))
             n_written += 1
Exemplo n.º 18
0
	def ratio(self):
		res = None
		med = self.getHeavyLightRatioSummary().median
		if not na.isNA(med):
			res = math.exp(med)
		return res
Exemplo n.º 19
0
	def normalizeRatiosBy(self, ratio, norm_ratio):
		self.heavy_light_ratio_list = [x/ratio for x in self.heavy_light_ratio_list if not na.isNA(x)]
		self.heavy_light_normalized_ratio_list = [x/norm_ratio for x in self.heavy_light_normalized_ratio_list if not na.isNA(x)]
Exemplo n.º 20
0
	# Get directory of guide file
	path = os.path.dirname(fname)
	curwd = os.getcwd()

	species_names = []
	with open(fname,'r') as inf:
		os.chdir(path)
		tab = util.readTable(inf, header=True)
		rows = tab.dictrows
		if options.debug:
			rows = [x for x in tab.dictrows][:2]
		just_started = True
		for row in rows:
			spec_fname = row['filename']
			#print(spec_fname)
			if not na.isNA(spec_fname):
				spec_inf = util.readTable(open(spec_fname,'r'), header=True)
				twig = phyloutil.treeFromClassificationTable(spec_inf)
				added = phyloutil.mergeTrees(tree_root, twig, add_to_leaf=just_started)
				if added:
					just_started = False
					species_names.append(row['updated.species'])
					#print(spec_fname)
				else:
					info_outs.write("# Didn't add {}\n".format(spec_fname))
				#phyloutil.printTree(tree_root)
	# Testing
	# Write tree
	# Read it back in
	# Extract leaf species
	# Check to make sure they're all the ones we expect
Exemplo n.º 21
0
def isNA(x):
    sys.stderr.write("util.isNA() should be updated to na.isNA()")
    return na.isNA(x)
Exemplo n.º 22
0
	def add(self, x):
		if not na.isNA(x) and x>0.0:
			super(LogAccumulator,self).add(math.log(x))
			self._nolog_sum += x
		else:
			self._na += 1
Exemplo n.º 23
0
Arquivo: util.py Projeto: dad/base
def isNA(x):
	sys.stderr.write("util.isNA() should be updated to na.isNA()")
	return na.isNA(x)
Exemplo n.º 24
0
Arquivo: util.py Projeto: dad/base
def isNA(x):
    print("util.isNA() should be updated to na.isNA()", file=sys.stderr)
    return na.isNA(x)
Exemplo n.º 25
0
	def normalized_ratio_ml(self):
		res = None
		med = self.getNormalizedMediumLightRatioSummary().median
		if not na.isNA(med):
			res = math.exp(med)
		return res
Exemplo n.º 26
0
            output_fields = []
            for rat in ['hl', 'ml', 'hm']:
                ratio_stats = pep.getRatioSummary(rat)
                ratio_norm_stats = pep.getNormalizedRatioSummary(rat)
                output_fields.append(
                    util.FieldFormatter(ratio_stats.median, "{0:e}"))
                output_fields.append(
                    util.FieldFormatter(ratio_stats.mean, "{0:e}"))
                output_fields.append(
                    util.FieldFormatter(ratio_norm_stats.median, "{0:e}"))
                output_fields.append(
                    util.FieldFormatter(ratio_norm_stats.mean, "{0:e}"))
                rn_lower_95 = None
                rn_upper_95 = None
                if not na.isNA(ratio_norm_stats.se):
                    rn_lower_95 = math.exp(
                        math.log(ratio_norm_stats.mean) -
                        1.96 * ratio_norm_stats.se)
                    rn_upper_95 = math.exp(
                        math.log(ratio_norm_stats.mean) +
                        1.96 * ratio_norm_stats.se)
                output_fields.append(util.FieldFormatter(rn_lower_95, "{0:e}"))
                output_fields.append(util.FieldFormatter(rn_upper_95, "{0:e}"))
                output_fields.append(
                    util.FieldFormatter(ratio_stats.n, "{0:d}"))
                output_fields.append(
                    util.FieldFormatter(ratio_stats.sd, "{0:e}"))
                output_fields.append(
                    util.FieldFormatter(ratio_norm_stats.sd, "{0:e}"))
                # Intensity ratios -- no "normalized" ratios here.
Exemplo n.º 27
0
def naStringParser(x):
    """A parser that respects NA's."""
    v = None
    if not na.isNA(x):
        v = str(x)
    return v
Exemplo n.º 28
0
	parser.add_argument("--mw", dest="do_mw", default=False, action="store_true", help="compute molecular weights?")
	parser.add_argument("--target-aas", dest="target_aas", type=str, default=translate.AAs(), help="amino acids (e.g. ACDEF) for frequency analysis")
	parser.add_argument("-p", "--pseudo", dest="pseudocount", type=float, default=0.0, help="pseudocount to add to all frequencies")
	parser.add_argument("-o", "--out", dest="out_fname", type=str, default=None, help="output filename")
	options = parser.parse_args()

	cdna_dict = biofile.readFASTADict(os.path.expanduser(options.cds_in_fname))
	prot_dict = biofile.readFASTADict(os.path.expanduser(options.prot_in_fname))

	# Read paralog data from Yeast Gene Order Browser file
	ygob_data = util.readTable(file(os.path.expanduser(options.paralog_fname),'r'))
	paralog_dict = {}
	for flds in ygob_data.dictrows:
		scer1 = flds['scer1'].strip()
		scer2 = flds['scer2'].strip()
		if not (na.isNA(scer1) or na.isNA(scer2)):
			paralog_dict[scer1] = scer2
			paralog_dict[scer2] = scer1

	# Read SGD data
	sgd_features = util.readTable(file(os.path.expanduser(options.feature_fname),'r'), header=False)
	'''
	http://downloads.yeastgenome.org/curation/chromosomal_feature/SGD_features.README
	1.   Primary SGDID (mandatory)
	2.   Feature type (mandatory)
	3.   Feature qualifier (optional)
	4.   Feature name (optional)
	5.   Standard gene name (optional)
	6.   Alias (optional, multiples separated by |)
	7.   Parent feature name (optional)
	8.   Secondary SGDID (optional, multiples separated by |)
Exemplo n.º 29
0
    def inferHandlers(self, max_lines=100):
        # DAD: run through fields until we've seen at least one non-NA for each.
        handlers_identified = False
        li = 0
        self.cur_line = self.cache.getLine(li)
        self.handlers = None
        inferred_string = []
        while not handlers_identified and li < max_lines and self.isValid():
            if not self.isComment() and not self.isBlank():
                # Not a comment line -- parse it.
                if self.strip:
                    self.cur_line = self.cur_line.strip()
                flds = self.cur_line.split(self.delim)
                flds[-1] = flds[-1].strip()  # Get rid of \n
                # Initialize empty handler list if we haven't done so already
                if self.handlers is None:
                    self.handlers = [None] * len(flds)
                    inferred_string = ['X'] * len(flds)
                #if len(flds) != len(self.handlers):
                #	print flds
                assert len(flds) == len(
                    self.handlers
                ), "Number of fields {} not equal to number of handlers {}".format(
                    len(flds), len(self.handlers))
                for hi in range(len(self.handlers)):
                    fld = flds[hi]
                    if self.handlers[hi] is None:
                        if not na.isNA(fld):
                            handler_key = self.inferHandlerKey(fld)
                            inferred_string[hi] = handler_key
                            self.handlers[hi] = self.handler_dict[handler_key]
                    else:  # handler has already been found; just confirm, and upgrade if necessary
                        try:
                            val = self.handlers[hi](fld)
                        except ValueError:
                            #print "upgrading handler", inferred_string[hi],
                            handler_key = self.inferHandlerKey(fld)
                            inferred_string[hi] = handler_key
                            self.handlers[hi] = self.handler_dict[handler_key]
                            #print "to", handler_key

                # We're finished when all handlers are not None.
                handlers_identified = len(
                    [h for h in self.handlers if h is None]) == 0
            if not handlers_identified:
                li += 1
                try:
                    self.cur_line = self.cache.getLine(li)
                except ReaderEOFError:
                    # We've reached the end of the file with an inconclusive result -- some fields
                    # still can't have types inferred.
                    # Just assume everything's a string.
                    for hi in range(len(self.handlers)):
                        if self.handlers[hi] is None:
                            self.handlers[hi] = self.handler_dict["s"]
                    handlers_identified = True
        if not handlers_identified and li >= max_lines:
            # Went past the allowed number of lines to look ahead; set all unset handlers to strings
            for hi in range(len(self.handlers)):
                if self.handlers[hi] is None:
                    self.handlers[hi] = self.handler_dict["s"]
        #print inferred_string
        inferred_string = ''.join(inferred_string)
        return inferred_string
Exemplo n.º 30
0
    # Create mapping
    mapping_dict = dict(zip(map_table['species'],
                            map_table['updated.species']))

    # Update the FASTA headers
    #new_headers = []
    #new_seqs = []
    seq_dict = {}
    header_dict = {}
    short_species_names = {}
    for (i, h) in enumerate(headers):
        species_name = extractSpeciesName(h)
        short_name = makeShortSpeciesName(species_name)
        try:
            updated_species_name = mapping_dict[species_name]
            if not na.isNA(updated_species_name):
                new_header = "{}[{}]{}".format(
                    h.split('[')[0], updated_species_name,
                    h.split(']')[1])
                #new_headers.append(new_header)
                #new_seqs.append(seqs[i])
                seq_dict[updated_species_name] = seqs[i]
                header_dict[updated_species_name] = new_header
                short_species_names[updated_species_name] = short_name
        except KeyError as ke:
            print(ke)

    # Iterate over tree and write out FASTA in tree-sorted order
    n_written = 0
    sorted_headers = []
    sorted_seqs = []
Exemplo n.º 31
0
	def add(self, x):
		if not na.isNA(x) and x>0.0:
			super(LogAccumulator,self).add(math.log(x))
			self._nolog_sum += x
		else:
			self._na += 1
Exemplo n.º 32
0
Arquivo: mspep.py Projeto: dad/base
		for (pos, pep) in pep_list:
			n_peps += 1
			pepid = "{}-{}".format(options.target_orf, n_peps)
			line = gap*pos + pep.sequence + gap*(len_prot-(len(pep.sequence)+pos))
			outs.write(">{}\n{}\n".format(pepid, line))
	elif options.output_type == 'ratio':
		outs.write("seq\tmod.seq\tbegin\tend\tratio\tint.ratio\tintensity\tintensity.h\tintensity.l\n")
		n_written = 0
		for (pos, pep) in pep_list:
			#ratio_stats = pep.getHeavyLightRatioSummary()
			for (ri,ratio) in enumerate(pep.heavy_light_ratio_list):
				inth = pep.intensity_h_list[ri]
				intl = pep.intensity_l_list[ri]
				inten = None
				# Ratio of intensities
				int_ratio = None
				if not (na.isNA(inth) or na.isNA(intl)):
					inten = inth+intl
					if intl>0:
						int_ratio = inth/float(intl)
				outs.write("{seq}\t{modseq}\t{begin}\t{end}\t{ratio}\t{intratio}\t{inten}\t{inth}\t{intl}\n".format(
					seq=pep.sequence, modseq=pep.modified_sequence, begin=pos+1, end=pos+len(pep.sequence), 
					ratio=na.formatNA(ratio), intratio=na.formatNA(int_ratio), inten=na.formatNA(inten), inth=na.formatNA(inth), intl=na.formatNA(intl)))
				n_written += 1
		info_outs.write("# Wrote {} peptide records\n".format(n_written))
			#outs.write("{seq}\t{begin}\t{end}\t{ratio}\t{ratio_n}\n".format(
			#	seq=pep.sequence, begin=pos+1, end=pos+len(pep.sequence), ratio=na.formatNA(ratio_stats.median), ratio_n=na.formatNA(ratio_stats.n)))
		
			
	
Exemplo n.º 33
0
                        default=None,
                        help="output filename")
    options = parser.parse_args()

    cdna_dict = biofile.readFASTADict(os.path.expanduser(options.cds_in_fname))
    prot_dict = biofile.readFASTADict(os.path.expanduser(
        options.prot_in_fname))

    # Read paralog data from Yeast Gene Order Browser file
    ygob_data = util.readTable(
        file(os.path.expanduser(options.paralog_fname), 'r'))
    paralog_dict = {}
    for flds in ygob_data.dictrows:
        scer1 = flds['scer1'].strip()
        scer2 = flds['scer2'].strip()
        if not (na.isNA(scer1) or na.isNA(scer2)):
            paralog_dict[scer1] = scer2
            paralog_dict[scer2] = scer1

    # Read SGD data
    sgd_features = util.readTable(file(
        os.path.expanduser(options.feature_fname), 'r'),
                                  header=False)
    '''
	http://downloads.yeastgenome.org/curation/chromosomal_feature/SGD_features.README
	1.   Primary SGDID (mandatory)
	2.   Feature type (mandatory)
	3.   Feature qualifier (optional)
	4.   Feature name (optional)
	5.   Standard gene name (optional)
	6.   Alias (optional, multiples separated by |)