from lexibank_deepadungpalaung import Dataset from lingpy import * from lingpy.compare.partial import Partial from lingpy.evaluate.acd import bcubes columns=('concept_name', 'language_id', 'value', 'form', 'segments', 'language_glottocode', 'cogid_cognateset_id' ) namespace=(('concept_name', 'concept'), ('language_id', 'doculect'), ('segments', 'tokens'), ('language_glottocode', 'glottolog'), ('concept_concepticon_id', 'concepticon'), ('language_latitude', 'latitude'), ('language_longitude', 'longitude'), ('cognacy', 'cognacy'), ('cogid_cognateset_id', 'cog')) part = Partial.from_cldf(Dataset().cldf_dir.joinpath('cldf-metadata.json'), columns=columns, namespace=namespace) part.renumber('cog') method = input('method: ') # type 'cogid' or 'cog' for method to see a tree based on Deepadung et al.'s # cognate judgements if method == 'lexstatcogids': part.get_partial_scorer(runs=10000) part.partial_cluster(method='lexstat', ref="lexstatcogids", threshold=0.55) elif method == 'lexstatcogid': part.get_scorer(runs=10000) part.cluster(method='lexstat', ref="lexstatcogid", threshold=0.55)
wl = Wordlist.from_cldf(Dataset().cldf_dir.joinpath('cldf-metadata.json')) i = 0 for idx, tokens in wl.iter_rows('tokens'): #print(idx, tokens) for segment in tokens.n: if not segment: print(idx, tokens) from lingpy.compare.partial import Partial columns = ('concept_name', 'language_id', 'value', 'form', 'segments', 'language_glottocode', 'cogid_cognateset_id') namespace = (('concept_name', 'concept'), ('language_id', 'doculect'), ('segments', 'tokens'), ('language_glottocode', 'glottolog'), ('concept_concepticon_id', 'concepticon'), ('language_latitude', 'latitude'), ('language_longitude', 'longitude'), ('cognacy', 'cognacy'), ('cogid_cognateset_id', 'cogid')) var = Dataset().cldf_dir.joinpath('cldf-metadata.json') part = Partial.from_cldf(var) part.get_partial_scorer( runs=100) # make tests with 100 and 1000, when debugging) part.partial_cluster(method='lexstat', threshold=0.5, ref='cogids', cluster_method='infomap') alms = Alignments(part, ref='cogids') alms.align() alms.output('tsv', filename='deepadung-wordlist', ignore='all', prettify=False)
from lingpy.convert.strings import write_nexus from lingpy.compare.partial import Partial from lingpy.convert.plot import plot_tree # Load the necessary data part = Partial.from_cldf('cldf/cldf-metadata.json') # Compute cognate sets according to SCA and calculate the distance matrix part.partial_cluster(method='sca', threshold=0.45, ref="cogids", cluster_method="upgma") part.add_cognate_ids('cogids', 'cogid', idtype='strict') part.calculate('tree', ref='cogid', tree_calc='upgma') out = write_nexus(part, mode='splitstree', filename='distance_matrix.nex') part.output('dst', filename='distance_matrix') plot_tree(str(part.tree)) print(part.tree.asciiArt()) # Compute cognate sets according to LexStat and calculate the distance matrix # part.get_partial_scorer(runs=1000) # part.partial_cluster(method='lexstat', threshold=0.55, cluster_method='upgma', ref="lexstatids") # part.add_cognate_ids('lexstatids', 'lexstatid', idtype='strict') # part.calculate('tree', ref='lexstatid', tree_calc='upgma', force=True) # part.output('dst', filename='distance_matrix') # plot_tree(str(part.tree)) # print(part.tree.asciiArt())
from lingpy.compare.partial import Partial from lingpy.align.sca import Alignments from lexibank_chingelong import Dataset # Load data #part = Partial.from_cldf('cldf/cldf-metadata.json') part = Partial.from_cldf(Dataset().cldf_dir.joinpath('cldf-metadata.json')) # Compute cognate sets according to SCA (appended to the column cogids) part.partial_cluster(threshold=0.45, ref="cogids", cluster_method="upgma") # Compute cognate sets according to LexStat (appended to the column lexstatids) part.get_partial_scorer(runs=1000) part.partial_cluster(method='lexstat', threshold=0.55, cluster_method='upgma', ref="lexstatids") # Align the partial cognates alms = Alignments(part, ref='cogids') alms.align() # Write the data to a file alms.output('tsv', filename='alignments', ignore='all', prettify=False)