def test_setitem(self): wl = make_wordlist() with self.assertRaises(ValueError): wl[1] = [] with self.assertRaises(ValueError): wl[1, 'id'] = 'x'
def test_get_etymdict(self): wl = make_wordlist() self.assertEqual( wl.get_etymdict(ref='concept'), { 'foot': [['6'], ['7'], ['8'], ['9'], ['10']], 'hand, arm': [['1'], ['2'], ['3'], ['4'], ['5']], 'knee': [['11'], [], [], [], []] })
def test_run_and_dump(self): from lingpy3.ops import run_and_dump from lingpy3.jsonlib import path_from_checksum with patch('lingpy3.ops.CACHE_DIR', self.tmp_path()): wl = make_wordlist() res, checksum = run_and_dump('distances', wl) run_and_dump('distances', wl, __checksum__=checksum) self.assertTrue( path_from_checksum(checksum, outdir=self.tmp_path()).exists())
def test_operation(self): wl = write('csv', make_wordlist(), _outdir=self.tmp_path(), delimiter=',') args = '-o {0} operation distances csv:IWordlist:{1}:delimiter=,'.format( self.tmp_path(), wl) with capture(cli.main, *args.split()) as out: self.assertIn('written', out) self.assertTrue(bool(list(self.tmp_path().glob('*.json'))))
def test_distances(self): from lingpy3.ops.wordlist import distances wl = make_wordlist() self.assertEquals(distances(wl), [[0, 1.0, 0.5, 1.0, 0.5], [1.0, 0, 1.0, 0.0, 1.0], [0.5, 1.0, 0, 1.0, 0.0], [1.0, 0.0, 1.0, 0, 1.0], [0.5, 1.0, 0.0, 1.0, 0]]) self.assertEquals(distances(wl, mode='jaccard'), [[0, 1.0, 0.75, 1.0, 0.75], [1.0, 0, 0.8, 0.0, 0.8], [0.75, 0.8, 0, 0.8, 0.0], [1.0, 0.0, 0.8, 0, 0.8], [0.75, 0.8, 0.0, 0.8, 0]]) distances(wl, mode='shared', refB='x') # No shared concepts between languages: rows = [ ['1', 'l1', 'hand', 'hand', 'a', 'abera'], ['2', 'l2', 'arm', 'arm', 'b', 'aberb'], ] distances(make_wordlist(rows=rows))
def test_add_col(self): wl = make_wordlist() with self.assertRaises(ValueError): wl.add_col('id', lambda x: 'x') wl.add_col('xcol', lambda x: 'x') self.assertEqual(wl['1', 'xcol'], 'x') self.assertEqual(wl.header[-1], 'xcol') with self.assertRaises(ValueError): wl.add_col('xcol', lambda x: 'x') wl.add_col('xcol', lambda x: 'y', override=True) self.assertEqual(wl['1', 'xcol'], 'y') wl.add_col('zcol', lambda x: x['xcol']) self.assertEqual(wl['1', 'zcol'], 'y') wl['1', 'xcol'] = 'z' self.assertEqual(wl['1', 'xcol'], 'z')
def test_misc(self): from lingpy3.io import write, read, list_writers_doc, get, list_readers_doc self.assertTrue(bool(list_readers_doc(IWordlist, self.tmp_path()))) wl = make_wordlist() out = write('csv', wl, _outdir=self.tmp_path(), _stem='test', delimiter='\t') self.assertTrue(out.exists()) wl2 = read('csv', Wordlist, out, delimiter='\t') self.assertEqual(wl[1], wl2[1]) with out.open(encoding='utf8') as fp: wl3 = read('csv', IWordlist, fp.read(), delimiter='\t') self.assertEqual(wl[1], wl3[1]) for name, _, mod in list_writers_doc(wl): get(name, wl) scm = registry.get(ISoundClassModel, 'asjp') self.assertIn('Brown', get('txt', scm))
def test_iter_paps(self): wl = make_wordlist() self.assertEqual(list(wl.iter_paps(missing=-1)), [('a', [1, 1, 1, 1, 1]), ('b', [0, 1, 0, 1, 0]), ('fa', [0, 0, 1, 0, 1]), ('fb', [0, 1, 0, 1, 0]), ('gc', [1, -1, -1, -1, -1])])
def test_get_by_concept(self): wl = make_wordlist() self.assertEqual(len(wl.get_by_concept()), 3)
def test_get_by_language(self): wl = make_wordlist() self.assertEqual(len(wl.get_by_language()), 5) self.assertEqual(len(wl.get_by_language(language='l1')), 1)
def test_filter(self): wl = make_wordlist() self.assertEqual(len(list(wl.filter(language='l1', concept='knee'))), 1)
def test_get_slices(self): wl = make_wordlist() self.assertEqual(wl.get_slices([]), list(wl)) self.assertEqual(wl.get_slices(1)[0], wl[1, 'id'])
def test_init(self): wl = make_wordlist() self.assertEqual(wl.concepts, ['foot', 'hand, arm', 'knee']) # invalid data type for header: with self.assertRaises(ValueError): make_wordlist(header=['id', 'concept', 'doculect', 5, 6, 7]) # duplicate column names in header: with self.assertRaises(ValueError): make_wordlist(header=['id', 'concept', 'doculect', 'id']) # missing columns in header: with self.assertRaises(ValueError): make_wordlist(header=[]) # row length not matching header length: with self.assertRaises(ValueError): make_wordlist(rows=[[1, 2]]) # duplicate row ID: with self.assertRaises(ValueError): make_wordlist(rows=[[1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 5, 6]])
def test_dump_load_custom(self): from lingpy3.jsonlib import load, dump wl = make_wordlist() out = dump(wl, outdir=self.tmp_path()) self.assertEqual(wl, load(out, Wordlist))
def test_misc(self): from lingpy3.ops import run, list_ops_doc wl = make_wordlist() self.assertIn('distances', set(spec[0] for spec in list_ops_doc(wl))) self.assertIsInstance(run('distances', wl), list)