""" from __future__ import ( absolute_import, division, print_function, unicode_literals, ) import unittest from math import isnan, sqrt from abydos.stats import ConfusionTable UNIT_TABLE = ConfusionTable(1, 1, 1, 1) NULL_TABLE = ConfusionTable(0, 0, 0, 0) SCALE_TABLE = ConfusionTable(1, 2, 3, 4) # https://en.wikipedia.org/wiki/Confusion_matrix#Table_of_confusion CATSNDOGS_TABLE = ConfusionTable(5, 17, 2, 3) # https://en.wikipedia.org/wiki/Sensitivity_and_specificity#Worked_example WORKED_EG_TABLE = ConfusionTable(20, 1820, 180, 10) VERY_POOR_TABLE = ConfusionTable(0, 0, 200, 200) ALL_TABLES = ( UNIT_TABLE, NULL_TABLE, SCALE_TABLE, CATSNDOGS_TABLE, WORKED_EG_TABLE, VERY_POOR_TABLE,
def test_constructors(self): """Test abydos.stats.ConfusionTable constructors.""" self.assertEqual(ConfusionTable(), ConfusionTable()) self.assertEqual(ConfusionTable(), ConfusionTable(0)) self.assertEqual(ConfusionTable(), ConfusionTable(0, 0)) self.assertEqual(ConfusionTable(), ConfusionTable(0, 0, 0)) self.assertEqual(ConfusionTable(), ConfusionTable(0, 0, 0, 0)) self.assertNotEqual(ConfusionTable(), ConfusionTable(1)) self.assertNotEqual(ConfusionTable(), ConfusionTable(0, 1)) self.assertNotEqual(ConfusionTable(), ConfusionTable(0, 0, 1)) self.assertNotEqual(ConfusionTable(), ConfusionTable(0, 0, 0, 1)) # test int constructor & __eq__ by value self.assertEqual(SCALE_TABLE, ConfusionTable(1, 2, 3, 4)) # test tuple constructor self.assertEqual(SCALE_TABLE, ConfusionTable((1, 2, 3, 4))) self.assertEqual(SCALE_TABLE, ConfusionTable((1, 2, 3, 4), 5, 6, 7)) # test list constructor self.assertEqual(SCALE_TABLE, ConfusionTable([1, 2, 3, 4])) self.assertEqual(SCALE_TABLE, ConfusionTable([1, 2, 3, 4], 5, 6, 7)) # test dict constructor self.assertEqual( SCALE_TABLE, ConfusionTable({'tp': 1, 'tn': 2, 'fp': 3, 'fn': 4}) ) self.assertEqual( SCALE_TABLE, ConfusionTable({'tp': 1, 'tn': 2, 'fp': 3, 'fn': 4}, 5, 6, 7), ) self.assertEqual(NULL_TABLE, ConfusionTable({})) self.assertEqual( NULL_TABLE, ConfusionTable({'pt': 1, 'nt': 2, 'pf': 3, 'nf': 4}) ) # test __eq__ by id() self.assertTrue(SCALE_TABLE == SCALE_TABLE) self.assertFalse(CATSNDOGS_TABLE == SCALE_TABLE) # test __eq__ by tuple self.assertTrue(SCALE_TABLE == (1, 2, 3, 4)) self.assertFalse(CATSNDOGS_TABLE == (1, 2, 3, 4)) # test __eq__ by list self.assertTrue(SCALE_TABLE == [1, 2, 3, 4]) self.assertFalse(CATSNDOGS_TABLE == [1, 2, 3, 4]) # test __eq__ by dict self.assertTrue(SCALE_TABLE == {'tp': 1, 'tn': 2, 'fp': 3, 'fn': 4}) self.assertFalse( CATSNDOGS_TABLE == {'tp': 1, 'tn': 2, 'fp': 3, 'fn': 4} ) # test __eq__ with non-ConfusionTable/tuple/list/dict self.assertFalse(SCALE_TABLE == 5) # test invalid tuple constructor self.assertRaises(AttributeError, ConfusionTable, (1, 2))
def test_token_distance(self): """Test abydos.distance._TokenDistance members.""" self.assertAlmostEqual( Jaccard(intersection_type='soft', alphabet=24).sim( 'ATCAACGAGT', 'AACGATTAG' ), 0.68, ) self.assertAlmostEqual( Jaccard(qval=1, alphabet='CGAT').sim('ATCAACGAGT', 'AACGATTAG'), 0.9, ) self.assertAlmostEqual( Jaccard(tokenizer=QSkipgrams(qval=3), alphabet='CGAT').sim( 'ATCAACGAGT', 'AACGATTAG' ), 0.6372795969773299, ) self.assertAlmostEqual( Jaccard(alphabet=None).sim('synonym', 'antonym'), 0.3333333333333333, ) self.assertAlmostEqual( Jaccard(tokenizer=QSkipgrams(qval=3)).sim('synonym', 'antonym'), 0.34146341463414637, ) src_ctr = Counter({'a': 5, 'b': 2, 'c': 10}) tar_ctr = Counter({'a': 2, 'c': 1, 'd': 3, 'e': 12}) self.assertAlmostEqual(Jaccard().sim(src_ctr, tar_ctr), 0.09375) self.assertAlmostEqual( SokalMichener(normalizer='proportional').sim('synonym', 'antonym'), 0.984777917351113, ) self.assertAlmostEqual( SokalMichener(normalizer='log').sim('synonym', 'antonym'), 1.2385752469545532, ) self.assertAlmostEqual( SokalMichener(normalizer='exp', alphabet=0).sim( 'synonym', 'antonym' ), 3.221246147982545e18, ) self.assertAlmostEqual( SokalMichener(normalizer='laplace').sim('synonym', 'antonym'), 0.98856416772554, ) self.assertAlmostEqual( SokalMichener(normalizer='inverse').sim('synonym', 'antonym'), 197.95790155440417, ) self.assertAlmostEqual( SokalMichener(normalizer='complement').sim('synonym', 'antonym'), 1.0204081632653061, ) self.assertAlmostEqual( SokalMichener(normalizer='base case').sim('synonym', 'antonym'), 0.9897959183673469, ) self.assertAlmostEqual( SokalMichener().sim('synonym', 'antonym'), 0.9897959183673469 ) sm = SokalMichener() sm._tokenize('synonym', 'antonym') # noqa: SF01 self.assertEqual( sm._get_tokens(), # noqa: SF01 ( Counter( { '$s': 1, 'sy': 1, 'yn': 1, 'no': 1, 'on': 1, 'ny': 1, 'ym': 1, 'm#': 1, } ), Counter( { '$a': 1, 'an': 1, 'nt': 1, 'to': 1, 'on': 1, 'ny': 1, 'ym': 1, 'm#': 1, } ), ), ) self.assertEqual(sm._src_card(), 8) # noqa: SF01 self.assertEqual(sm._tar_card(), 8) # noqa: SF01 self.assertEqual( sm._symmetric_difference(), # noqa: SF01 Counter( { '$s': 1, 'sy': 1, 'yn': 1, 'no': 1, '$a': 1, 'an': 1, 'nt': 1, 'to': 1, } ), ) self.assertEqual(sm._symmetric_difference_card(), 8) # noqa: SF01 self.assertEqual(sm._total_complement_card(), 772) # noqa: SF01 self.assertEqual(sm._population_card(), 788) # noqa: SF01 self.assertEqual( sm._union(), # noqa: SF01 Counter( { '$s': 1, 'sy': 1, 'yn': 1, 'no': 1, 'on': 1, 'ny': 1, 'ym': 1, 'm#': 1, '$a': 1, 'an': 1, 'nt': 1, 'to': 1, } ), ) self.assertEqual(sm._union_card(), 12) # noqa: SF01 self.assertEqual( sm._difference(), # noqa: SF01 Counter( { '$s': 1, 'sy': 1, 'yn': 1, 'no': 1, 'on': 0, 'ny': 0, 'ym': 0, 'm#': 0, '$a': -1, 'an': -1, 'nt': -1, 'to': -1, } ), ) self.assertEqual( sm._intersection(), # noqa: SF01 Counter({'on': 1, 'ny': 1, 'ym': 1, 'm#': 1}), ) self.assertEqual( sm._get_confusion_table(), # noqa: SF01 ConfusionTable(tp=4, tn=772, fp=4, fn=4), ) sm = SokalMichener( alphabet=Counter({'C': 20, 'G': 20, 'A': 20, 'T': 20}), qval=1 ) sm._tokenize('ATCAACGAGT', 'AACGATTAG') # noqa: SF01 self.assertEqual(sm._total_complement_card(), 61) # noqa: SF01 jac = Jaccard( intersection_type='linkage', internal_assignment_problem=True ) self.assertAlmostEqual(jac.sim('abandonned', 'abandoned'), 1.0) self.assertAlmostEqual( jac.sim('abundacies', 'abundances'), 0.6296296296296297 ) # Some additional constructors needed to complete test coverage self.assertAlmostEqual( Jaccard(alphabet=None, qval=range(2, 4)).sim('abc', 'abcd'), 0.42857142857142855, ) self.assertAlmostEqual( AverageLinkage(qval=range(2, 4)).sim('abc', 'abcd'), 0.22558922558922556, ) self.assertAlmostEqual( Jaccard(alphabet='abcdefghijklmnop', qval=range(2, 4)).sim( 'abc', 'abcd' ), 0.42857142857142855, ) self.assertAlmostEqual( Jaccard( alphabet='abcdefghijklmnop', tokenizer=WhitespaceTokenizer() ).sim('abc', 'abcd'), 0.0, ) self.assertAlmostEqual( Jaccard(alphabet=list('abcdefghijklmnop')).sim('abc', 'abcd'), 0.5 ) self.assertAlmostEqual( Jaccard(tokenizer=CharacterTokenizer()).sim('abc', 'abcd'), 0.75 )