def evaluate_distibution(event_name, words, session, ids):
    """
    Do a complete evaluation of the distribution of words for an event. Compute Jensen-Shannon
    and  Jaccard Index
    :param event_name: Name of the event to be evaluated
    :return:
    """
    print(event_name, words)
    words_event, distribution_event, pairs_event = calculate_distribution_event(event_name, session, ids, True)
    path_references = Path(LOCAL_DATA_DIR_2, 'data', event_name, 'summaries', 'reference')
    references_list = [reference for reference in path_references.iterdir() if reference.is_file()]
    event_dist = dit.ScalarDistribution(words_event, distribution_event)
    words_set_event = set(words_event[:words])
    print('Most Common words in event: {}'.format(words_set_event))
    total_dist, all_words = global_distribution(references_list)
    all_words_set = set(all_words[:words])
    jaccard = len(words_set_event.intersection(all_words_set)) / len(words_set_event.union(all_words_set))
    print('Most Common words in all timelines: {}'.format(all_words_set))
    print('Jaccard Index with all timelines: {}'.format(jaccard))
    print('Jensen-Shannon with all timelines: {}'.format(jensen_shannon_divergence([total_dist, event_dist])))
    for reference in references_list:
        words_timeline, probs_timeline, pairs_timeline = calculate_distribution_timeline(event_name, reference)
        dist_timeline = dit.ScalarDistribution(words_timeline, probs_timeline)
        print('----------------------------')
        word_set_timeline = set(words_timeline[:words])
        print(reference.name)
        print('Most Common words in timeline: {}'.format(word_set_timeline))
        print('Jensen-Shannon: {}'.format(jensen_shannon_divergence([dist_timeline, event_dist])))
        jaccard = len(words_set_event.intersection(word_set_timeline)) / len(words_set_event.union(word_set_timeline))

        print('Jaccard Index: {}'.format(jaccard))
Пример #2
0
 def fitness(individual, data):
     individual = vector_to_dna(individual)
     # fitness = np.linalg.norm(target - vector(individual))
     fitness = jensen_shannon_divergence([
         dit.ScalarDistribution(target),
         dit.ScalarDistribution(vector(individual))
     ])
     return fitness
Пример #3
0
 def fitness(individual, data):
     individual = vector_to_dna(individual)
     if mode == "JSD":
         return jensen_shannon_divergence([
             dit.ScalarDistribution(target / len(k)),
             dit.ScalarDistribution(vector(individual) / len(k))
         ])
     elif mode == "ED":
         return np.linalg.norm(target - vector(individual))
     else:
         raise Exception("Fitness mode must be JSD or ED")
Пример #4
0
def calculate_jsd(input_values, input_probabilities):

    """
    Calculated Jensen-Shannon Divergence upon the table
    """
    processed_probabilities = scale_table(input_probabilities)
    x = dit.ScalarDistribution(amino_list, input_values, sample_space = amino_list, sort = True)
    jsd_values = []
    for row in processed_probabilities:
        y = dit.ScalarDistribution(amino_list, row, sample_space = amino_list, sort = True)
        jsd_values.append(jensen_shannon_divergence([x,y]))
    return jsd_values
Пример #5
0
              label='LEFT')
h_r = ax.hist(seg_t2_r.flatten(),
              100,
              range=(0, 100),
              histtype=u'step',
              alpha=1,
              lw=2,
              label='RIGHT')
ax.set_xlabel('T2 (ms)')
ax.set_ylabel('# COUNTS')
ax.legend()
plt.draw()
plt.savefig(join(joint_save_folder, 'T2_hist.png'), format='png', dpi=300)

# Compute Jensen-Shannon divergence

pmf_l = np.divide(h_l[0], np.sum(h_l[0]))
pmf_r = np.divide(h_r[0], np.sum(h_r[0]))

d_l = Distribution.from_ndarray(pmf_l)
d_r = Distribution.from_ndarray(pmf_r)

JSD = jensen_shannon_divergence([d_l, d_r])

pd_header = ['#counts', 'JS_Divergence']
pd_list = [[np.sum(h_l[0]), JSD], [np.sum(h_r[0]), JSD]]

df = pd.DataFrame(pd_list, columns=pd_header)

df.to_csv(join(joint_save_folder, 'histogram_calculation.csv'))
def calc_stats(sequence, verbose):
    """
    INPUT:
        * sequence - Sequence sampled from seq_gen class
    OUTPUT:
        - Summary Statistics:
        * js_temp - Jensen-Shannon-Divergence between standard-between-dev
                    empirical distributions compared between regimes
    """

    sequence_sub = sequence[sequence[:, 2] != 0.5, :]
    deviants, regime_switches = find_deviants(sequence)

    # Catch trial/regime switch prob
    catch_prob = len(sequence[sequence[:, 2] == 0.5, 0]) / sequence.shape[0]
    switch_prob = regime_switches / sequence.shape[0]

    stim_prob_overall = len(sequence[sequence[:, 2] == 1, 2]) / (
        len(sequence[sequence[:, 2] == 1, 2]) +
        len(sequence[sequence[:, 2] == 0, 2]))

    # 0th Order Stimulus probability (empirical)
    stim_prob_reg0 = np.mean(sequence[sequence[:, 1] == 0, 2])
    stim_prob_reg1 = np.mean(sequence[sequence[:, 1] == 1, 2])

    # 1st Order Stimulus prob (empirical)
    alt_prob_reg0 = np.mean(deviants[deviants[:, 0] == 0, 1])
    alt_prob_reg1 = np.mean(deviants[deviants[:, 0] == 1, 1])

    # Empirical pmf of standards between deviants for both regimes
    reg_0_dev = deviants[deviants[:, 0] == 0, :]
    reg_1_dev = deviants[deviants[:, 0] == 1, :]

    # Average train-length per regime:
    avg_train_reg0 = (np.sum(deviants[deviants[:, 0] == 0, 3]) /
                      np.count_nonzero(deviants[deviants[:, 0] == 0, 3]))
    avg_train_reg1 = (np.sum(deviants[deviants[:, 0] == 1, 3]) /
                      np.count_nonzero(deviants[deviants[:, 0] == 1, 3]))

    # Time spent in Regimes
    trials_in_reg0 = deviants[deviants[:, 0] == 0, 0]
    time_reg0 = trials_in_reg0.shape[0] / deviants.shape[0]

    try:
        epmf_reg_0_dev = np.histogram(reg_0_dev[:, 3],
                                      bins=int(np.max(reg_0_dev[:, 3])),
                                      density=True)

        epmf_reg_1_dev = np.histogram(reg_1_dev[:, 3],
                                      bins=int(np.max(reg_1_dev[:, 3])),
                                      density=True)

        # Calculate symmetric Jensen - Shannon divergence
        d1 = dit.ScalarDistribution(epmf_reg_0_dev[1][:-1], epmf_reg_0_dev[0])
        d2 = dit.ScalarDistribution(epmf_reg_1_dev[1][:-1], epmf_reg_1_dev[0])
        js_temp = jensen_shannon_divergence([d1, d2])
    except:
        js_temp = None

    if verbose:
        print(
            "Empirical Probabilities: \n Empirical Catch Prob.: {} \n Empirical Regime Switch Prob.: {} \n Empirical Overall High-Intensity Stimulus Prob.: {} \n Empirical Regime 0 High-Intensity Stimulus Prob.: {} \n Empirical Regime 1 High-Intensity Stimulus Prob.: {} \n Empirical Regime 0 Alternation Prob.: {} \n Empirical Regime 1 Alternation Prob.: {}  \n JS Div. Deviant Waiting Time Distr. between Regimes: {} \n Time in Regime 0: {} \n Average Train Length in Regime 0: {} \n Average Train Length in Regime 1: {}"
            .format(catch_prob, switch_prob, stim_prob_overall, stim_prob_reg0,
                    stim_prob_reg1, alt_prob_reg0, alt_prob_reg1, js_temp,
                    time_reg0, avg_train_reg0, avg_train_reg1))
        print("--------------------------------------------")

    stats_out = {
        "emp_catch_prob": catch_prob,
        "emp_overall_sp": stim_prob_overall,
        "emp_reg0_sp": stim_prob_reg0,
        "emp_reg1_sp": stim_prob_reg1,
        "emp_reg0_ap": alt_prob_reg0,
        "emp_reg1_ap": alt_prob_reg1,
        "js_div": js_temp,
        "avg_train_r0": avg_train_reg0,
        "avg_train_r1": avg_train_reg1
    }
    return stats_out, reg_0_dev, reg_1_dev
Пример #7
0
    rand_seq_1 = create_random_seq(sl)
    rand_seq_2 = create_random_seq(sl)

    #kmer_freqs_1 = k_mer_frequencies(rand_seq_1, k, include_missing = True)
    #kmer_freqs_2 = k_mer_frequencies(rand_seq_2, k, include_missing = True)

    #print(kmer_freqs_1)
    #print(kmer_freqs_2)

    vector_1 = vector(rand_seq_1, [k])
    vector_2 = vector(rand_seq_2, [k])

    eds.append(np.linalg.norm(vector_1 - vector_2))
    jsds.append(
        jensen_shannon_divergence([
            dit.ScalarDistribution(vector_1),
            dit.ScalarDistribution(vector_2)
        ]))

plt.figure()
plt.scatter(eds,
            jsds,
            edgecolor='black',
            linewidth='1',
            alpha=0.5,
            facecolor='green')
plt.xlabel('Euclidean Distance')
plt.ylabel('Jensen-Shannon Divergence')
plt.title('JSD vs. ED, k = ' + str(k) + ', seq_len = ' + str(sl))
plt.show()
Пример #8
0
def js_divergence(logits, ae_logits):
    a = dit.ScalarDistribution([0, 1], logits)
    b = dit.ScalarDistribution([0, 1], ae_logits)
    return jensen_shannon_divergence([a, b])
## for this, using ln
JS_by_hand = 0.5*(prob1[0][0][0][0]*torch.log(prob1[0][0][0][0]/ensemble_probs[0][0][0][0])\
             +prob1[0][1][0][0]*torch.log(prob1[0][1][0][0]/ensemble_probs[0][1][0][0]) \
             + prob3[0][0][0][0] * torch.log(prob3[0][0][0][0] / ensemble_probs[0][0][0][0]) \
             +prob3[0][1][0][0]*torch.log(prob3[0][1][0][0]/ensemble_probs[0][1][0][0]))

print('implemented JS by pytorch: ', JS_Div_loss, ' , implemented by hands:',
      JS_by_hand)

# for this, using log2
import dit, numpy as np
from dit.divergences import jensen_shannon_divergence
X = dit.ScalarDistribution(['0', '1'], prob1.numpy().ravel())
Y = dit.ScalarDistribution(['0', '1'], prob3.numpy().ravel())
print('JS-div in log2:', jensen_shannon_divergence([X, Y]), ' , in ln: ',
      jensen_shannon_divergence([X, Y]) / np.log(2) * np.log(np.e))

## image examples:

prob1 = F.softmax(torch.rand(1, 2, 256, 256), 1)
# prob2 =  F.softmax(torch.rand(1,2,256,256),1)
prob2 = copy.deepcopy(prob1)
prob3 = F.softmax(torch.rand(1, 2, 256, 256), 1)
# prob3 = copy.deepcopy(prob1)

ensemble_probs = torch.cat([prob1, prob2, prob3], 0)
distribution_number = ensemble_probs.shape[0]

Mixture_dist = ensemble_probs.mean(0, keepdim=True).expand(
    distribution_number, ensemble_probs.shape[1], ensemble_probs.shape[2],
def compute_jensen_shannon(event, reference_name, summary_name):
    reference_dist, summary_dist = create_distribution(event, reference_name, summary_name)
    return jensen_shannon_divergence([summary_dist, reference_dist])