Exemplo n.º 1
0
    def test_fisher_alpha(self):
        exp = 2.7823795367398798
        arr = np.array([4, 3, 4, 0, 1, 0, 2])
        obs = fisher_alpha(arr)
        self.assertAlmostEqual(obs, exp)

        # Should depend only on S and N (number of OTUs, number of
        # individuals / seqs), so we should obtain the same output as above.
        obs = fisher_alpha([1, 6, 1, 0, 1, 0, 5])
        self.assertAlmostEqual(obs, exp)

        # Should match another by hand:
        # 2 OTUs, 62 seqs, alpha is 0.39509
        obs = fisher_alpha([61, 0, 0, 1])
        self.assertAlmostEqual(obs, 0.39509, delta=0.0001)

        # Test case where we have >1000 individuals (SDR-IV makes note of this
        # case). Verified against R's vegan::fisher.alpha.
        obs = fisher_alpha([999, 0, 10])
        self.assertAlmostEqual(obs, 0.2396492)
Exemplo n.º 2
0
    def test_fisher_alpha(self):
        exp = 2.7823795367398798
        arr = np.array([4, 3, 4, 0, 1, 0, 2])
        obs = fisher_alpha(arr)
        self.assertAlmostEqual(obs, exp)

        # Should depend only on S and N (number of OTUs, number of
        # individuals / seqs), so we should obtain the same output as above.
        obs = fisher_alpha([1, 6, 1, 0, 1, 0, 5])
        self.assertAlmostEqual(obs, exp)

        # Should match another by hand:
        # 2 OTUs, 62 seqs, alpha is 0.39509
        obs = fisher_alpha([61, 0, 0, 1])
        self.assertAlmostEqual(obs, 0.39509, delta=0.0001)

        # Test case where we have >1000 individuals (SDR-IV makes note of this
        # case). Verified against R's vegan::fisher.alpha.
        obs = fisher_alpha([999, 0, 10])
        self.assertAlmostEqual(obs, 0.2396492)
Exemplo n.º 3
0
def sentence_start(text):

    #Count variables
    ratio_dict = {
        "nouns": 0,
        "pronouns": 0,
        "verbs": 0,
        "adjectives": 0,
        "adverbs": 0,
        "conjunctions": 0,
        "particles": 0,
        "pronouns": 0,
        "prepositions": 0,
        "others": 0,
        "simpson": 0,
        "fisher": 0,
        "brillouin": 0,
        "berger_parker": 0
    }

    #Tokenize into sentences
    sentences = nltk.tokenize.sent_tokenize(text)
    problem_sentences = []

    #Loop through sentences
    for sentence in sentences:
        tags = identify_speech(sentence)
        ratio_dict[tags[0]] = ratio_dict[tags[0]] + 1

        if tags[0] == "nouns" or tags[0] == "pronouns":
            problem_sentences.append(sentence)

    #Calculate diversity
    simpson = simpson_e(list(ratio_dict.values())[0:7])
    fisher = fisher_alpha(list(ratio_dict.values())[0:7])
    brillouin = brillouin_d(list(ratio_dict.values())[0:7])
    berger_parker = berger_parker_d(list(ratio_dict.values())[0:7])

    #Convert to percentage
    #ratio_dict = {k: "".join([str(round(v / len(sentences),4)*100),"%"]) for k, v in ratio_dict.items()}

    #Update diversity metric
    ratio_dict['simpson'] = simpson
    ratio_dict['fisher'] = fisher
    ratio_dict['brillouin'] = brillouin
    ratio_dict['berger_parker'] = berger_parker

    return (ratio_dict, problem_sentences)
Exemplo n.º 4
0
def mercat_compute_alpha_beta_diversity(counts,bif):

    abm = dict()

    abm['shannon'] = skbio_alpha.shannon(counts)
    abm['simpson'] = skbio_alpha.simpson(counts)
    abm['simpson_e'] = skbio_alpha.simpson_e(counts)
    abm['goods_coverage'] = skbio_alpha.goods_coverage(counts)
    abm['fisher_alpha'] = skbio_alpha.fisher_alpha(counts)
    abm['dominance'] = skbio_alpha.dominance(counts)
    abm['chao1'] = skbio_alpha.chao1(counts)
    abm['chao1_ci'] = skbio_alpha.chao1_ci(counts)
    abm['ace'] = skbio_alpha.ace(counts)

    with open(bif + "_diversity_metrics.txt", 'w') as dmptr:
        for abmetric in abm:
            dmptr.write(abmetric + " = " + str(abm[abmetric]) + "\n")