def from_counts(cls, alphabet, counts, prior= None): """Build a LogoData object from counts.""" # Counts is a Motif object? #counts = counts.array seq_length, A = counts.shape if prior is not None: prior = array(prior, float64) if prior is None or sum(prior)==0.0: R = log(A) ent = zeros( seq_length, float64) entropy_interval = None for i in range (0, seq_length) : C = sum(counts[i]) #FIXME: fixup corebio.moremath.entropy()? if C == 0 : ent[i] = 0.0 else : ent[i] = R - entropy(counts[i]) else : ent = zeros( seq_length, float64) entropy_interval = zeros( (seq_length,2) , float64) R = log(A) for i in range (0, seq_length) : alpha = array(counts[i] , float64) alpha += prior posterior = Dirichlet(alpha) ent[i] = posterior.mean_relative_entropy(prior/sum(prior)) entropy_interval[i][0], entropy_interval[i][1] = \ posterior.interval_relative_entropy(prior/sum(prior), 0.95) weight = array( na.sum(counts,axis=1) , float) weight /= max(weight) return cls(seq_length, alphabet, counts, ent, entropy_interval, weight)
def from_counts(cls, alphabet, counts, stats_func=None, prior=None, composition=None, ngdata=None, pvalue=None): """Build a LogoData object from counts.""" # Counts is a Motif object? #counts = counts.array if alphabet in [codon_dna_alphabet, codon_rna_alphabet]: seq_length, A = len(counts), len(alphabet) else: seq_length, A = counts.shape if prior is not None: prior = array(prior, float64) if ngdata is not None: if ngdata.counts.shape[0] != seq_length: raise ValueError("Sequence length in negative dataset should be the same as the input's.") if prior is None or sum(prior)==0.0: R = log(A) odds = None ent = zeros(seq_length, float64) entropy_interval = None max_value = 0.0 for i in range (0, seq_length) : C = sum(counts[i]) if C == 0: ent[i] = 0.0 else: ent[i] = R - entropy(counts[i]) if max_value < na.max(ent[i]): max_value = na.max(ent[i]) else : odds = [] ent = zeros(seq_length, float64) entropy_interval = zeros( (seq_length,2) , float64) max_value_ent = 0.0 max_value_int = 0.0 refdata = None if pvalue is not None or stats_func is None: pvalue = array(pvalue, float64) pvalue_calc = False else: pvalue = [] pvalue_calc = True R = log(A) for i in range (0, seq_length): alpha = array(counts[i] , float64) alpha += prior posterior = Dirichlet(alpha) ent[i] = posterior.mean_relative_entropy(prior/sum(prior)) odds.append(posterior.odds_ratio(composition)) entropy_interval[i][0], entropy_interval[i][1] = \ posterior.interval_relative_entropy(prior/sum(prior), 0.95) if pvalue_calc == True: if ngdata is not None: ngalpha = array(ngdata.counts[i], float64) ngalpha += ngdata.prior ngposterior = Dirichlet(ngalpha) ngcompos = ngposterior.alpha/sum(ngposterior.alpha) pvalue.append( stats_func(posterior.alpha, ngcompos) ) else: pvalue.append( stats_func(posterior.alpha, composition) ) if max_value_ent < na.max(ent[i]): max_value_ent = na.max(ent[i]) if max_value_int < na.max(entropy_interval[i][1]): max_value_int = na.max(entropy_interval[i][1]) max_value = max_value_ent + max_value_int weight = array( na.sum(counts,axis=1) , float) weight /= max(weight) return cls(seq_length, alphabet, counts, ent, entropy_interval, weight, pvalue, composition, odds, max_value, prior)