Пример #1
0
def compute_uncertainties(pred_mc):
    """
    pred_mc is an N x K x C matrix
        N is the number of samples
        K is the number of draws from the posterior weight distribution
        C is the number of classes in the prediction
    Returns: a dictionary containing
        pred:   predictive categorical softmax obtained by integrating over
        draws from the weights, of shape (N, C)
        aleatoric:  aleatoric uncertainty, of shape (N,)
        epistemic:  epistemic uncertainty, of shape (N,)
        predictive:  predictive uncertainty, of shape (N,)
    """

    nb_test = pred_mc.shape[1]
    pred = np.mean(pred_mc, axis=1)
    predictive_uncertainty = -np.sum(pred * ma.log2(pred).filled(0), axis=-1)
    aleatoric_uncertainty = - 1/nb_test \
            * np.sum(pred_mc * ma.log2(pred_mc).filled(0), axis=(1,2))
    epistemic_uncertainty = predictive_uncertainty - aleatoric_uncertainty
    return {
        'pred': pred,
        'predictive': predictive_uncertainty,
        'aleatoric': aleatoric_uncertainty,
        'epistemic': epistemic_uncertainty
    }
Пример #2
0
def genlist(x):
    if x==1:
        return False
    elif (log2(x))%1==0:
        return not genlist(int(x/2))
    else:
        return not genlist(x-2**floor(log2(x)))
Пример #3
0
def compute_parents_kld(P, Q):
    for A in (P, Q):
        assert np.all(A >= 0)
        assert np.allclose(1, np.sum(A, axis=0))
    logP = ma.log2(ma.masked_equal(P, 0))
    logQ = ma.log2(ma.masked_equal(Q, 0))
    kld = np.sum(P * (logP - logQ), axis=0)

    assert np.allclose(0, kld[kld < 0])
    kld = np.abs(kld)
    assert np.all(kld >= 0)
    return kld
Пример #4
0
def cal_mavalue(pk, mk1_name, mk2_name):
    """
    calculate M&A value of known pk with 2 reads data
    """
    if len(
            pk.rds_density
    ) >= 2 and mk1_name in pk.rds_density and mk2_name in pk.rds_density:
        density1 = pk.rds_density[mk1_name]
        density2 = pk.rds_density[mk2_name]
        mvalue = log2(density1) - log2(density2)
        avalue = (log2(density1) + log2(density2)) / 2
        pk.another_info.update({'mvalue': mvalue})
        pk.another_info.update({'avalue': avalue})
        return mvalue, avalue
Пример #5
0
def calc_cadi(eta, struct):
    '''
  Compute the clone and ancestor diversity index (CADI), which is the joint
  entropy of eta and the subclones ancestral to a clone.

  >>> eta = np.array([[0.5], [0.2], [0.2], [0.1]])
  >>> struct = [0, 1, 1]
  >>> cadi = calc_cadi(eta, struct)
  >>> np.isclose(cadi[0], 2.1219280948873624)
  True
  '''
    K, S = eta.shape

    adj = util.convert_parents_to_adjmatrix(struct)
    anc = util.make_ancestral_from_adj(adj, check_validity=True)
    assert anc.shape == (K, K)
    A = np.sum(anc, axis=0) - 1
    A = np.repeat(A[1:][:, np.newaxis], S, axis=1)
    assert np.all(A >= 1)

    eta = _fix_eta(eta)
    assert A.shape == eta.shape

    H_joint = -ma.sum(eta * (ma.log2(eta) - np.log2(A)), axis=0)
    assert H_joint.shape == (S, )
    return H_joint
Пример #6
0
def calc_cmdi(eta, clusters, struct):
    '''Compute the clone and mutation diversity index (CMDI), which is the joint
  entropy of eta and the mutations presnt in a clone (i.e., the mutations
  specific to it as well as the mutations inherited from its ancestors).'''
    K, S = eta.shape

    adj = util.convert_parents_to_adjmatrix(struct)
    anc = util.make_ancestral_from_adj(adj, check_validity=True)
    assert anc.shape == (K, K)

    vids, mutmem = util.make_membership_mat(clusters)
    M = len(vids)
    # Root node has no associated mutations.
    mutmem = np.insert(mutmem, 0, 0, axis=1)
    assert mutmem.shape == (M, K)
    assert np.sum(mutmem) == M
    # `mutanc[i,j] = 1` iff mutation `i` occurred in node `j` or a node ancestral
    # to it.
    mutanc = np.dot(mutmem, anc)
    # `mutanc_cnt[i]` = number of mutations that occurred in clone `i` and all
    # clones ancestral to it.
    mutanc_cnt = np.sum(mutanc, axis=0)

    assert mutanc_cnt[0] == 0 and np.all(mutanc_cnt[1:] > 0)
    M_k = np.repeat(mutanc_cnt[1:][:, np.newaxis], S, axis=1)
    eta = _fix_eta(eta)
    assert eta.shape == M_k.shape

    H_joint = -ma.sum(eta * (ma.log2(eta) - np.log2(M_k)), axis=0)
    assert H_joint.shape == (S, )
    return H_joint
Пример #7
0
def joint_entropy(X1, X2, dist1=None, dist2=None):
    '''
    Calculate the joint entropy of two variables X1, and X2
    H(X, Y) = -sum(p(xy)[i] * log2(p(xy)[i]))
    https://en.wikipedia.org/wiki/Joint_entropy
    '''
    if dist1 == None:
        nbins1 = determine_nbins1D(X1)
    else:
        rule1 = 'Sturges'
        if dist1 == 'normal':
            rule1 = 'Scott'
        elif dist1 == 'unknown':
            rule1 = 'Freedman‐Diaconis'
        nbins1 = determine_nbins1D(X1, rule1)

    if dist2 == None:
        nbins2 = determine_nbins1D(X2)
    else:
        rule2 = 'Sturges'
        if dist2 == 'normal':
            rule2 = 'Scott'
        elif dist2 == 'unknown':
            rule2 = 'Freedman‐Diaconis'
        nbins2 = determine_nbins1D(X2, rule2)

    pxy, _, _ = np.histogram2d(X1, X2, bins=[nbins1, nbins2])
    pxy = pxy / pxy.sum()

    return -np.sum(pxy * ma.log2(pxy).filled(0))
Пример #8
0
    def _compute_first_order_stats(self, Nbin=32):
        """
            compute first-order statistics of the data
            Nbin: the number of discrete intensity levels to compute the data histogrm, default Nbin = 32
        """
        if self._maskImage_ndarray is None:  # no mask is defined, so use all the entire image data
            data = self._inputImage_ndarray.flatten()
        else:
            data = self._inputImage_ndarray[self._maskImage_ndarray]

        #TODO: here is the bottleneck when data is all zero's
        data_stats = dict(ss.describe(data)._asdict())
        data_stats.pop('nobs')  # delete the dict entry of 'nobs'

        # make sure each key is prefixed with 'FOstats_'
        for k, val in data_stats.items():
            new_k = 'FOstats_' + k
            data_stats[new_k] = data_stats.pop(k)

        self._df_feature_output.update(data_stats)

        # compute histogram-related stats
        # density = True ==> the integral of p_data = 1.0, i.e. np.sum(p_data*np.diff(p_bin)) = 1.0
        p_data, p_bin = np.histogram(data, bins=Nbin, density=True)
        tmp = np.sum(p_data * ma.log2(p_data))
        if tmp is ma.masked:
            print '::Oh NO O_O:: FOstats_entropy is a masked constant!!'
        else:
            self._df_feature_output['FOstats_entropy'] = tmp
        self._df_feature_output['FOstats_energy'] = np.sum(data**2)
        self._df_feature_output['FOstats_uniformity'] = np.sum(p_data**2)

        print '::ImageFeature:: complete compute_first_order_stats!'
Пример #9
0
def compute_parentropy(parent_dist):
    K = len(parent_dist)
    assert parent_dist.shape == (K, K - 1)
    parent_dist = ma.masked_equal(parent_dist, 0)
    parent_entropy = -ma.sum(parent_dist * ma.log2(parent_dist), axis=0)
    total_entropy = np.sum(parent_entropy)
    entropy_per_node = total_entropy / (K - 1)
    return (total_entropy, entropy_per_node, parent_entropy)
Пример #10
0
def WaveShrink(y, typename, L, qmf):
    n = len(y)
    J = int(log2(n))
    wc = FWT_PO(y, L, qmf)
    if typename == 'visu':
        wc[(2 ** L): n] = VisuThresh(wc[2 ** L: n])
    elif typename == 'sure':
        wc = MultiVisu(wc, L)
    return IWT_PO(wc, L, qmf), wc
Пример #11
0
def IWT_PO(wc, L, qmf):
    x = wc[:2 ** L]
    n = len(wc)
    J = int(log2(n))
    for j in range(L, J):
        A = UpDyadLo(x, qmf)
        B = wc[(2 ** j): 2 ** (j + 1)]
        B = UpDyadHi(B, qmf)
        x = [sum(it) for it in zip(A, B)]
    return x
Пример #12
0
def FWT_PO(x, L, qmf):
    n = len(x)
    J = int(log2(n))
    wcoef = zeros(n)
    for j in reversed(range(L, J)):
        alfa = DownDyadHi(x, qmf)
        for idx, i in enumerate(dyad(j)):
            wcoef[i - 1] = alfa[idx]
        x = DownDyadLo(x, qmf)
    wcoef[:2 ** L] = x
    return wcoef
Пример #13
0
def calc_cdi(eta):
    '''
  Compute the clone diversity index (CDI), which is the entropy of eta.

  >>> cdi = calc_cdi([[0.5], [0.3], [0.2]])
  >>> np.isclose(cdi[0], 0.9709505944546686)
  True
  '''
    eta = _fix_eta(eta)
    K, S = eta.shape
    H = -ma.sum(eta * ma.log2(eta), axis=0)
    return H
Пример #14
0
def cal_mapvalue_rescaled(pk, mk1_name, mk2_name, ma_fit):
    """
    calculate M&A&P value of known pk with 2 reads data and fit parameters
    ma_fit: R2 = ma_fit[0] * R1 + ma_fit[1]
    """
    density1 = pk.rds_density[mk1_name]
    density2 = pk.rds_density[mk2_name]
    log2_density1_re = (2 - ma_fit[1]) * log2(density1) / (
        2 + ma_fit[1]) - 2 * ma_fit[0] / (2 + ma_fit[1])
    mvalue_re = log2_density1_re - log2(density2)
    avalue_re = (log2_density1_re + log2(density2)) / 2

    density1_norm = 2**log2_density1_re
    density2_norm = 2**log2(density2)
    pvalue = np.ones(pk.pk_num)
    for i in xrange(pk.pk_num):
        pvalue[i] = __digit_exprs_p_norm(density1_norm[i], density2_norm[i])
    pk.another_info.update({'MAnorm_mvalue': mvalue_re})
    pk.another_info.update({'MAnorm_avalue': avalue_re})
    pk.another_info.update({'MAnorm_pvalue': pvalue})
    return mvalue_re, avalue_re, pvalue
Пример #15
0
def TFIDFPairs(tf_vec, df_vec, num_docs, weighted_type=[0, 0]):
    # tf
    #eps = 0
    eps = np.finfo(np.float32).eps
    tf = np.copy(tf_vec)
    zero_idx = np.where(tf == 0)
    
    if weighted_type[0] == 0:
        tf = 1 * (tf_vec > 0)
    elif weighted_type[0] == 1:
        tf = tf_vec.copy()
    elif weighted_type[0] == 2:
        tf = 1 + tf_vec
    elif weighted_type[0] == 3:
        tf = np.log2(1 + tf_vec)
    elif weighted_type[0] == 4:
        if np.max(tf_vec) == 0:
            tf = 0.5 + 0.5 * tf_vec
        else:
            tf = 0.5 + 0.5 * tf_vec / np.max(tf_vec)
    elif weighted_type[0] == 5:
        tf = 1 + ma.log2(tf_vec).filled(0)
    elif weighted_type[0] == 6:
        tf = 1 + np.log2(1 + tf_vec)
    tf[zero_idx] = eps
    
    # idf
    if weighted_type[1] == 0:
        idf = 1
    elif weighted_type[1] == 1:
        idf = np.log2(num_docs / df_vec)
    elif weighted_type[1] == 2:
        idf = np.log2(1 + num_docs / df_vec)
    elif weighted_type[1] == 3:
        idf = np.log2(1 + (num_docs - df_vec + 0.5) / (df_vec + 0.5))
    elif weighted_type[1] == 4:
        idf = np.log2((num_docs - df_vec + 0.5) / (df_vec + 0.5))
    elif weighted_type[1] == 5:
        idf = np.log2(1 + np.max(df_vec) / df_vec)
    elif weighted_type[1] == 6:
        idf = np.log2(1 + (num_docs - df_vec) / df_vec)
    elif weighted_type[1] == 7:
        idf = np.log2(1 + (np.max(df_vec) - df_vec + 0.5) / (df_vec + 0.5))
    elif weighted_type[1] == 8:
        idf = np.log2((np.max(df_vec) - df_vec + 0.5) / (df_vec + 0.5))
        
    return [tf, idf]
Пример #16
0
    def calculate_score_of_word_given_class(self, word_token, class_value,
                                            vocab, smoothing_factor):
        if constants.DEBUG_VERBOSE:
            print "getting score for {}, class {}".format(
                word_token, class_value)

        p = self.calculate_probability_of_word_given_class(
            word_token, class_value, smoothing_factor)
        score = log2(p)

        if p == SKIP_IT:
            if constants.DEBUG_VERBOSE:
                print "Skipping {} in class {}".format(word_token, class_value)
            return None
        if constants.DEBUG_VERBOSE and p == 0:
            print "Logging Zero score for {}".format(word_token, class_value)

        if constants.DEBUG_VERBOSE:
            print "score updated for {}, class {}".format(
                word_token, class_value)
        return score
Пример #17
0
def TFIDF(qry, doc):
    doc_freq = docFreq(doc)
    num_docs = doc.shape[0] + 1
    #qry_new = {q_id : {q_wid : (.5 + .5 * np.log2(q_wc)) * np.log2(num_docs / (1 + doc_freq[q_wid][0]))
    #            for q_wid, q_wc in q_content.items()} for q_id, q_content in qry.items()}
    #doc_new = {d_id : {d_wid : (d_wc) * np.log2(num_docs / (1 + doc_freq[d_wid][0]))
    #            for d_wid, d_wc in d_content.items()} for d_id, d_content in doc.items()}
    qry_tfidf = np.zeros((qry.shape[0], qry.shape[1]))
    doc_tfidf = np.zeros((doc.shape[0], doc.shape[1]))
    for qi, qvec in enumerate(qry):
        zero_idx = np.where(qry[qi] == 0)
        qry_tfidf[qi] = (0.5 + 0.5 * ma.log2(qvec).filled(0)
                         ) * np.log2(1 + num_docs / (1 + doc_freq[:, 0]))
        qry_tfidf[qi][zero_idx] = 0

    for di, dvec in enumerate(doc):
        zero_idx = np.where(doc[di] == 0)
        doc_tfidf[di] = dvec * np.log2(1 + num_docs / (1 + doc_freq[:, 0]))
        doc_tfidf[di][zero_idx] = 0

    return [qry_tfidf, doc_tfidf]
Пример #18
0
 def calc_insulation(dat, dist_range, delta_size):
     nbin = len(dat)
     min_d,max_d = dist_range
     if min_d < 0 or max_d < min_d:
         raise ValueError('calc_insulation() requires 0 <= min_d <= max_d')
     insulation = ma.zeros(nbin)
     for i in xrange(nbin):
         if i < max_d or i >= nbin-max_d:
             insulation[i] = -1
         else:
             insulation[i] = dat[i,(i-max_d):(i-min_d)].sum() + dat[i,(i+min_d):(i+max_d)].sum()
     k = insulation > 0
     insulation[k] = ma.log2(insulation[k]/insulation[k].mean())
     insulation[~k] = 0
     delta = ma.zeros(nbin)
     for i in xrange(nbin):
         if i < delta_size:
             delta[i] = insulation[0] - insulation[i+delta_size]
         elif i >= nbin - delta_size:
             delta[i] = insulation[i-delta_size] - insulation[nbin-1] 
         else:
             delta[i] = insulation[i-delta_size]-insulation[i+delta_size]
     return insulation,delta
Пример #19
0
def compute_entropy(distribution):
    """
    Given a distribution, computes the Shannon entropy of the distribution in
    bits.

    Input
    -----
    - distribution: a 1D array of probabilities that sum to 1

    Output:
    - entropy: the Shannon entropy of the input distribution in bits
    """

    # -------------------------------------------------------------------------
    # ERROR CHECK -- DO NOT MODIFY
    #
    if np.abs(1 - np.sum(distribution)) > 1e-6:
        exit('In compute_entropy: distribution should sum to 1.')

    inverse_logs = -1 * ma.log2(distribution).filled(0)
    entropy = np.sum(distribution * inverse_logs)

    return entropy
Пример #20
0
def map_f(r):
    # convert input to an array
    # ref: https://stackoverflow.com/questions/29318459/python-function-that-handles-scalar-or-arrays
    r = np.asarray(r)
    scalar_input = False
    if r.ndim == 0:
        r = r[np.newaxis]  # make 1D
        scalar_input = True

    # compute x and y values
    x = np.floor(ma.log2(r)) + 1.0
    y = (r / (2.0**(x - 1.0))) - 1.0

    # compute the sum of x and y, filling zero
    # where there are masked values (should only
    # occur when there are zero entries)
    retval = (x + y).filled(0)

    # return scalar or array
    if scalar_input:
        return np.squeeze(retval)
    else:
        return retval
Пример #21
0
 def _compute_sum_entropy(self):
     self._sum_entropy = -np.sum(self._p_xplusy * ma.log2(self._p_xplusy))
Пример #22
0
 def perplexity(self, documents: Iterable[str]) -> float:
     vectors, labels = self.tokenizer.encoded_training_set_from_documents(
         documents)
     predictions = self.model.predict(vectors)
     n = predictions.shape[0]
     return 2**(-ma.log2(predictions * labels).filled(0).sum() / n)
Пример #23
0
def main():
    # Call My Data
    from data_call_test import data_call
    mysignal = data_call("ECG_HE", 1, 0)  # ECG HE 0 ~ 30

    # DWT
    import Wavelet as wavelet
    from numpy.ma import log2

    ## Candidate for L
    L = [x for x in range(1, int(log2(len(mysignal))) + 1)]
    ## Candidate for QMF
    qmflist = {
        'haar': [0],
        'db': [4, 6, 8, 10, 12, 14, 16, 18, 20],
        'coif': [1, 2, 3, 4, 5],
        'symmlet': [4, 5, 6, 7, 8, 9, 10]
    }

    ## 실험설계
    import matplotlib.pyplot as plt
    # 0. 특징신호선을 정의
    # 뾰족뾰족이
    # 1. 기존의 Signal 을 Plotting
    # 2. L 을 하나하나 늘려가면서 특징신호선이 어떻게 죽는지 확인한다.
    # 3. haar부터 시작한다.
    # 4. 특징신호선이 살았다 죽었다를 어떻게 메져링하지?

    ### 1. 기존의 신호를 plotting한다.
    plt.figure(0)
    plt.plot(mysignal)
    plt.title('Original Signal')

    ### 2. 각 QMF 에서 뾰족이가 어디서 사라지는지 확인한다.
    #### 2-1. QMF 를 정의한다.
    qmfname = "haar"
    qmfpar = 0
    remove_level = 0
    qmf = wavelet.qmf(qmfname, qmfpar)

    #### 2-2. DWT가 제대로 안 정의되는 param을 잘라버린다.
    if remove_level > 0:
        for idx in range(1, remove_level + 1):
            L.remove(idx)
    print L

    # Plot
    it = 0
    for idx in L:
        wc = wavelet.FWT_PO(mysignal, idx, qmf)
        wc_cut = wavelet.cutwavelet(wc, idx)
        ## 2**idx 이후의 coef는 다 잘라버리고 Approximating하겠다는 뜻이다.
        ## 즉, idx가 13까지 정의될 때, idx = 12면 1차 분해
        ## L에서 1이 짤렸으면, 12차 분해는 고려하지 않겠다는 뜻이다.
        recons = wavelet.IWT_PO(wc_cut, idx, qmf)
        decomp_lvl = L[len(L) - 1] - idx
        print " L = idx : ", idx, " decomp: ", decomp_lvl

        # Plotting
        if it % 3 == 0:
            plt.figure()
        plt.subplot(3, 1, (it % 3) + 1)
        plt.plot(recons)
        plt.title(str(decomp_lvl) + " th recons")
        it += 1
Пример #24
0
 def transform(self, a):
     a = _mask_non_positives(a * 2.0)
     if isinstance(a, MaskedArray):
         return ma.log2(a)
     return np.log2(a)
Пример #25
0
def dyadlength(x):
    # x : signal
    if log2(len(x)) - int(log2(len(x))) == 0:
        return [len(x), int(log2(len(x)))]
def get_info(self, values):
    result = []
    for j in range(len(values)):
        for k in range(len(values[j])):
            result.append(-1 * values[k] * log2(values[k]))
    return result
Пример #27
0
def calc_entropy(A):
    A = ma.masked_equal(A, 0)
    ent = -ma.sum(A * ma.log2(A))
    return np.abs(np.array(ent))
Пример #28
0
 def transform(self, a):
     a = _mask_non_positives(a * 2.0)
     if isinstance(a, MaskedArray):
         return ma.log2(a)
     return np.log2(a)
Пример #29
0
def main():
    # Call My Data
    from data_call_test import data_call
    mysignal = data_call("ECG_HE", 1, 0) # ECG HE 0 ~ 30


    # DWT
    import Wavelet as wavelet
    from numpy.ma import log2

    ## Candidate for L
    L = [x for x in range(1, int(log2(len(mysignal)) )+1 )]
    ## Candidate for QMF
    qmflist = {
        'haar': [0],
        'db': [4, 6, 8, 10, 12, 14, 16, 18, 20],
        'coif': [1, 2, 3, 4, 5],
        'symmlet': [4, 5, 6, 7, 8, 9, 10]
    }

    ## 실험설계
    import matplotlib.pyplot as plt
    # 0. 특징신호선을 정의
        # 뾰족뾰족이
    # 1. 기존의 Signal 을 Plotting
    # 2. L 을 하나하나 늘려가면서 특징신호선이 어떻게 죽는지 확인한다.
    # 3. haar부터 시작한다.
    # 4. 특징신호선이 살았다 죽었다를 어떻게 메져링하지?

    ### 1. 기존의 신호를 plotting한다.
    plt.figure(0)
    plt.plot(mysignal)
    plt.title('Original Signal')

    ### 2. 각 QMF 에서 뾰족이가 어디서 사라지는지 확인한다.
    #### 2-1. QMF 를 정의한다.
    qmfname = "haar"
    qmfpar = 0
    remove_level = 0
    qmf = wavelet.qmf(qmfname, qmfpar)

    #### 2-2. DWT가 제대로 안 정의되는 param을 잘라버린다.
    if remove_level > 0:
        for idx in range(1,remove_level+1):
            L.remove(idx)
    print L

    # Plot
    it = 0
    for idx in L:
        wc = wavelet.FWT_PO(mysignal,idx,qmf)
        wc_cut = wavelet.cutwavelet(wc,idx)
        ## 2**idx 이후의 coef는 다 잘라버리고 Approximating하겠다는 뜻이다.
        ## 즉, idx가 13까지 정의될 때, idx = 12면 1차 분해
        ## L에서 1이 짤렸으면, 12차 분해는 고려하지 않겠다는 뜻이다.
        recons = wavelet.IWT_PO(wc_cut,idx,qmf)
        decomp_lvl = L[len(L)-1] - idx
        print " L = idx : " ,idx, " decomp: ", decomp_lvl

        # Plotting
        if it % 3 == 0:
            plt.figure()
        plt.subplot(3,1,(it % 3) + 1)
        plt.plot(recons)
        plt.title(str(decomp_lvl) + " th recons")
        it += 1
Пример #30
0
def MultiVisu(wc, L):
    n = len(wc)
    J = int(log2(n))
    ws = wc
    wc[(2 ** L):n] = VisuThresh(wc[2 ** L: n])
    return wc
Пример #31
0
def draw_figs_to_show_data(pks1_uni, pks2_uni, merged_pks, pks1_name,
                           pks2_name, ma_fit, reads1_name, reads2_name):
    """
    draw four figures to show data before and after rescaled
    """
    pks_3set = [pks1_uni, pks2_uni, merged_pks]
    pks1_name = ' '.join([pks1_name, 'unique'])
    pks2_name = ' '.join([pks2_name, 'unique'])
    merged_pks_name = 'merged common peaks'
    pks_names = [pks1_name, pks2_name, merged_pks_name]
    colors = 'bgr'
    a_max = 0
    a_min = 10000
    plt.figure(1).set_size_inches(16, 12)
    for (idx, pks) in enumerate(pks_3set):
        mvalues, avalues = get_peaks_mavalues(pks)
        if len(avalues) != 0:
            a_max = max(max(avalues), a_max)
            a_min = min(min(avalues), a_min)
        plt.scatter(avalues, mvalues, s=10, c=colors[idx])
    plt.xlabel('A value')
    plt.ylabel('M value')
    plt.grid(axis='y')
    plt.legend(pks_names, loc='best')
    plt.title('before rescale')

    # plot the fitting model into figure 1
    x = np.arange(a_min, a_max, 0.01)
    y = ma_fit[1] * x + ma_fit[0]
    plt.plot(x, y, '-', color='k')
    plt.savefig('before_rescale.png')

    # plot the scatter plots of read count in merged common peaks between two chip-seq sets
    plt.figure(2).set_size_inches(16, 12)
    rd_min = 1000
    rd_max = 0
    rds_density1, rds_density2 = [], []
    for key in merged_pks.keys():
        for pk in merged_pks[key]:
            rds_density1.append(pk.read_density1), rds_density2.append(
                pk.read_density2)
    rd_max = max(max(log2(rds_density1)), rd_max)
    rd_min = min(min(log2(rds_density1)), rd_min)
    plt.scatter(log2(rds_density1),
                log2(rds_density2),
                s=10,
                c='r',
                label=merged_pks_name,
                alpha=0.5)
    plt.xlabel(' log2 read density' + ' by ' + '"' + reads1_name + '" reads')
    plt.ylabel(' log2 read density' + ' by ' + '"' + reads2_name + '" reads')
    plt.grid(axis='y')
    plt.legend(loc='upper left')
    plt.title('Fitting Model via common peaks')
    rx = np.arange(rd_min, rd_max, 0.01)
    ry = (2 - ma_fit[1]) * rx / (2 + ma_fit[1]) - 2 * ma_fit[0] / (2 +
                                                                   ma_fit[1])
    plt.plot(rx, ry, '-', color='k')
    plt.savefig('log2_read_density.png')

    # plot the MA plot after rescale
    plt.figure(3).set_size_inches(16, 12)
    for (idx, pks) in enumerate(pks_3set):
        normed_mvalues, normed_avalues = get_peaks_normed_mavalues(pks)
        plt.scatter(normed_avalues, normed_mvalues, s=10, c=colors[idx])
    plt.xlabel('A value')
    plt.ylabel('M value')
    plt.grid(axis='y')
    plt.legend(pks_names, loc='best')
    plt.title('after rescale')
    plt.savefig('after_rescale.png')

    # generate MA plot for this set of peaks together with p-value
    plt.figure(4).set_size_inches(16, 12)
    for (idx, pks) in enumerate(pks_3set):
        normed_mvalues, normed_avalues = get_peaks_normed_mavalues(pks)
        colors = -log10(get_peaks_pvalues(pks))
        for i, c in enumerate(colors):
            if c > 50:
                colors[i] = 50
        plt.scatter(normed_avalues, normed_mvalues, s=10, c=colors, cmap='jet')
    plt.colorbar()
    plt.grid(axis='y')
    plt.xlabel('A value')
    plt.ylabel('M value')
    plt.title('-log10(P-value)')
    plt.savefig('-log10_P-value.png')
    plt.close()
Пример #32
0
 def perplexity(self, documents: Iterable[str]) -> float:
     vectors, labels = self.tokenizer.encoded_training_set_from_documents(documents)
     predictions = self.model.predict(vectors)
     n = predictions.shape[0]
     return 2 ** (-ma.log2(predictions * labels).filled(0).sum() / n)
Пример #33
0
 def _compute_difference_entropy(self):
     tmp = np.sum(self._p_xminusy * ma.log2(self._p_xminusy))
     if not is_mask_constant(tmp, 'diff_entropy'):
         self._diff_entropy = tmp
Пример #34
0
 def _compute_entropy(self):
     tmp = np.sum(self._p * ma.log2(self._p))
     if not is_mask_constant(tmp, 'entropy'):
         self._entropy = tmp
Пример #35
0
#plt.imshow(id0_grid[:,25,:].transpose(), origin='bottom')
# plt.plot(id0)
# plt.colorbar()

# In[24]:

# using masked arrays
import numpy.ma as ma

# In[25]:

# Entropy calculation
h = np.zeros_like(block, dtype='float64')
for unit_id in unit_ids:
    block_masked = ma.masked_equal(block_probs[unit_id], 0)
    h -= ma.log2(block_masked) * block_masked

# In[26]:

# h = h.reshape([50,50,50])
# plt.imshow(h[:,0,:].transpose(), origin='bottom', cmap='viridis')
# plt.plot(id0)
# plt.colorbar()

# In[27]:

import pickle

# In[28]:

# save generated objects for further use