def test_adding_central_concepts(): dataset = pycldf.Dataset.from_metadata( Path(__file__).parent / "data/cldf/smallmawetiguarani/cldf-metadata.json") dirname = Path(tempfile.mkdtemp(prefix="lexedata-test")) excel_writer = ExcelWriter(dataset=dataset, add_central_concepts=True) output = dirname / "out.xlsx" excel_writer.create_excel(out=output) # load central concepts from output ws = op.load_workbook(output).active concept_index = 0 for row in ws.iter_rows(min_row=1, max_row=1): for cell in row: if cell.value == "Central_Concept": concept_index = cell.column # when accessing the row as a tuple the index is not 1-based as for excel sheets central_concepts = [ row[concept_index - 1].value for row in ws.iter_rows(min_row=2) ] assert central_concepts == [ "one", "one", "one", "one", "two", "three", "two", "three", "four_1", "four", "five", ]
def test_roundtrip(cldf_wordlist, working_and_nonworking_bibfile): filled_cldf_wordlist = working_and_nonworking_bibfile(cldf_wordlist) dataset, target = filled_cldf_wordlist c_formReference = dataset["CognateTable", "formReference"].name c_cogsetReference = dataset["CognateTable", "cognatesetReference"].name old_judgements = {(row[c_formReference], row[c_cogsetReference]) for row in dataset["CognateTable"].iterdicts()} writer = ExcelWriter(dataset, database_url="https://example.org/lexicon/{:}") forms = util.cache_table(filled_cldf_wordlist[0]) languages = util.cache_table(filled_cldf_wordlist[0], "LanguageTable").values() judgements = util.cache_table(filled_cldf_wordlist[0], "CognateTable").values() cogsets = util.cache_table(filled_cldf_wordlist[0], "CognatesetTable").values() writer.create_excel(rows=cogsets, judgements=judgements, forms=forms, languages=languages) # Reset the existing cognatesets and cognate judgements, to avoid # interference with the the data in the Excel file dataset["CognateTable"].write([]) dataset["CognatesetTable"].write([]) import_cognates_from_excel(writer.ws, dataset) new_judgements = {(row[c_formReference], row[c_cogsetReference]) for row in dataset["CognateTable"].iterdicts()} assert new_judgements == old_judgements
def test_adding_singleton_cognatesets(): dataset = pycldf.Dataset.from_metadata( Path(__file__).parent / "data/cldf/smallmawetiguarani/cldf-metadata.json") dirname = Path(tempfile.mkdtemp(prefix="lexedata-test")) excel_writer = ExcelWriter(dataset=dataset, singleton_cognate=True) output = dirname / "out.xlsx" excel_writer.create_excel(out=output) # load central concepts from output ws = op.load_workbook(output).active cogset_index = 0 for row in ws.iter_rows(min_row=1, max_row=1): for cell in row: if cell.value == "CogSet": cogset_index = cell.column # when accessing the row as a tuple the index is not 1-based as for excel sheets cogset_ids = [ row[cogset_index - 1].value for row in ws.iter_rows(min_row=2) ] assert cogset_ids == [ "one1", "one1", "one2", "one6", "two1", "three1", "two8", "three9", "four1", "four8", "five5", "X1_old_paraguayan_guarani", "X2_ache", "X3_paraguayan_guarani", ]
def test_roundtrip_separator_column(cldf_wordlist): """Test whether a CognatesetTable column with separator survives a roundtrip.""" dataset, target = copy_to_temp(cldf_wordlist) dataset.add_columns("CognatesetTable", "CommaSeparatedTags") dataset["CognatesetTable", "CommaSeparatedTags"].separator = "," c_id = dataset["CognatesetTable", "id"].name write_back = list(dataset["CognatesetTable"]) tags = [] for tag, row in zip( itertools.cycle([["two", "tags"], ["single-tag"], [], ["tag;containing;other;separator"]]), write_back, ): tags.append((row[c_id], tag)) row["CommaSeparatedTags"] = tag dataset.write(CognatesetTable=write_back) writer = ExcelWriter(dataset) _, out_filename = tempfile.mkstemp(".xlsx", "cognates") writer.create_excel(out_filename) import_cognates_from_excel(out_filename, dataset) reread_tags = [(c[c_id], c["CommaSeparatedTags"]) for c in dataset["CognatesetTable"]] reread_tags.sort(key=lambda x: x[0]) tags.sort(key=lambda x: x[0]) assert reread_tags == tags
def test_toexcel_runs(cldf_wordlist): filled_cldf_wordlist = copy_to_temp(cldf_wordlist) writer = ExcelWriter( dataset=filled_cldf_wordlist[0], database_url=str(filled_cldf_wordlist[1]), ) _, out_filename = tempfile.mkstemp(".xlsx", "cognates") writer.create_excel(out_filename)
def test_no_comment_column(): dataset, _ = copy_to_temp( Path(__file__).parent / "data/cldf/smallmawetiguarani/cldf-metadata.json") dataset.remove_columns("FormTable", "comment") writer = ExcelWriter(dataset=dataset, ) forms = util.cache_table(dataset).values() for form in forms: assert writer.form_to_cell_value( form).strip() == "{ e t a k ɾ ã } ‘one, one’" break
def test_adding_singleton_cognatesets_with_status(caplog): dataset = get_dataset( Path(__file__).parent / "data/cldf/smallmawetiguarani/cldf-metadata.json") dataset.add_columns("CognatesetTable", "Status_Column") with caplog.at_level(logging.WARNING): excel_writer = ExcelWriter(dataset=dataset) cogsets, judgements = create_singletons( dataset, status="NEW", by_segment=True, ) properties_as_key(cogsets, dataset["CognatesetTable"].tableSchema.columns) properties_as_key(judgements, dataset["CognateTable"].tableSchema.columns) forms = util.cache_table(dataset) languages = util.cache_table(dataset, "LanguageTable").values() excel_writer.create_excel(rows=cogsets, judgements=judgements, forms=forms, languages=languages) assert re.search("no Status_Column to write", caplog.text) is None cogset_index = 0 for row in excel_writer.ws.iter_rows(min_row=1, max_row=1): for cell in row: if cell.value == "Status_Column": cogset_index = cell.column - 1 # when accessing the row as a tuple the index is not 1-based as for excel sheets status = [ row[cogset_index].value for row in excel_writer.ws.iter_rows(min_row=2) ] assert status == [ None, None, None, None, None, None, None, None, None, None, None, "NEW", "NEW", "NEW", "NEW", ]
def test_adding_singleton_cognatesets(caplog): dataset = get_dataset( Path(__file__).parent / "data/cldf/smallmawetiguarani/cldf-metadata.json") with caplog.at_level(logging.WARNING): excel_writer = ExcelWriter(dataset=dataset, ) cogsets, judgements = create_singletons( dataset, status="NEW", by_segment=False, ) properties_as_key(cogsets, dataset["CognatesetTable"].tableSchema.columns) properties_as_key(judgements, dataset["CognateTable"].tableSchema.columns) forms = util.cache_table(dataset) languages = util.cache_table(dataset, "LanguageTable").values() excel_writer.create_excel(rows=cogsets, judgements=judgements, forms=forms, languages=languages) assert re.search("No Status_Column", caplog.text) # load central concepts from output cogset_index = 0 for row in excel_writer.ws.iter_rows(min_row=1, max_row=1): for cell in row: if cell.value == "CogSet": cogset_index = cell.column - 1 # when accessing the row as a tuple the index is not 1-based as for excel sheets cogset_ids = [ row[cogset_index].value for row in excel_writer.ws.iter_rows(min_row=2) ] assert cogset_ids == [ "one1", "one1", "one2", "one6", "two1", "three1", "two8", "three9", "four1", "four8", "five5", "X_old_paraguayan_guarani_two_1", "X_paraguayan_guarani_five_1", ]
def test_missing_required_column(): dataset, _ = copy_to_temp( Path(__file__).parent / "data/cldf/smallmawetiguarani/cldf-metadata.json") dataset.remove_columns("FormTable", "ID") # TODO: switch to pycldf.dataset.SchemaError with pytest.raises(KeyError): excel_writer = ExcelWriter(dataset=dataset) forms = util.cache_table(dataset) languages = util.cache_table(dataset, "LanguageTable").values() judgements = util.cache_table(dataset, "CognateTable") cogsets = util.cache_table(dataset, "CognatesetTable") excel_writer.create_excel(rows=cogsets, judgements=judgements, forms=forms, languages=languages)
def test_included_segments(caplog): ds = util.fs.new_wordlist(FormTable=[], CognatesetTable=[], CognateTable=[]) E = ExcelWriter(dataset=ds) E.form_to_cell_value({"form": "f", "parameterReference": "c"}) with caplog.at_level(logging.WARNING): cell = E.form_to_cell_value({ "id": "0", "cognateReference": "j", "form": "fo", "parameterReference": "c", "segments": ["f", "o"], "segmentSlice": ["3:1"], }) assert cell == "{ f o } ‘c’" assert re.search("segment slice '3:1' is invalid", caplog.text) is None
def test_no_cognate_table(caplog): dataset, _ = empty_copy_of_cldf_wordlist( Path(__file__).parent / "data/cldf/smallmawetiguarani/cldf-metadata.json") dataset.remove_table("CognateTable") with pytest.raises(SystemExit): ExcelWriter(dataset=dataset, ) assert "presupposes a separate CognateTable" in caplog.text assert "lexedata.edit.add_cognate_table" in caplog.text
def test_toexcel_runs(cldf_wordlist, working_and_nonworking_bibfile): filled_cldf_wordlist = working_and_nonworking_bibfile(cldf_wordlist) writer = ExcelWriter( dataset=filled_cldf_wordlist[0], database_url=str(filled_cldf_wordlist[1]), ) forms = util.cache_table(filled_cldf_wordlist[0]) languages = util.cache_table(filled_cldf_wordlist[0], "LanguageTable").values() judgements = util.cache_table(filled_cldf_wordlist[0], "CognateTable").values() cogsets = util.cache_table(filled_cldf_wordlist[0], "CognatesetTable").values() writer.create_excel(rows=cogsets, judgements=judgements, forms=forms, languages=languages) _, out_filename = tempfile.mkstemp(".xlsx", "cognates") writer.wb.save(filename=out_filename)
def test_no_cognateset_table(caplog): dataset, _ = empty_copy_of_cldf_wordlist( Path(__file__).parent / "data/cldf/smallmawetiguarani/cldf-metadata.json") dataset.remove_table("CognatesetTable") # TODO: SystemExit or dataset error? with pytest.raises((SystemExit, SchemaError)) as exc_info: ExcelWriter(dataset=dataset, ) if exc_info.type == SystemExit: assert "presupposes a separate CognatesetTable" in caplog.text assert "lexedata.edit.add_table" in caplog.text
def test_roundtrip(cldf_wordlist): filled_cldf_wordlist = copy_to_temp(cldf_wordlist) dataset, target = filled_cldf_wordlist c_formReference = dataset["CognateTable", "formReference"].name c_cogsetReference = dataset["CognateTable", "cognatesetReference"].name old_judgements = {(row[c_formReference], row[c_cogsetReference]) for row in dataset["CognateTable"].iterdicts()} writer = ExcelWriter(dataset) _, out_filename = tempfile.mkstemp(".xlsx", "cognates") writer.create_excel(out_filename) # Reset the existing cognatesets and cognate judgements, to avoid # interference with the the data in the Excel file dataset["CognateTable"].write([]) dataset["CognatesetTable"].write([]) import_cognates_from_excel(out_filename, dataset) new_judgements = {(row[c_formReference], row[c_cogsetReference]) for row in dataset["CognateTable"].iterdicts()} assert new_judgements == old_judgements
def test_roundtrip_separator_column(cldf_wordlist, working_and_nonworking_bibfile): """Test whether a CognatesetTable column with separator survives a roundtrip.""" dataset, target = working_and_nonworking_bibfile(cldf_wordlist) dataset.add_columns("CognatesetTable", "CommaSeparatedTags") dataset["CognatesetTable", "CommaSeparatedTags"].separator = "," c_id = dataset["CognatesetTable", "id"].name write_back = list(dataset["CognatesetTable"]) tags = [] for tag, row in zip( itertools.cycle([["two", "tags"], ["single-tag"], [], ["tag;containing;other;separator"]]), write_back, ): tags.append((row[c_id], tag)) row["CommaSeparatedTags"] = tag dataset.write(CognatesetTable=write_back) writer = ExcelWriter(dataset, database_url="https://example.org/lexicon/{:}") _, out_filename = tempfile.mkstemp(".xlsx", "cognates") forms = util.cache_table(dataset) languages = util.cache_table(dataset, "LanguageTable").values() judgements = util.cache_table(dataset, "CognateTable").values() cogsets = util.cache_table(dataset, "CognatesetTable").values() writer.create_excel(rows=cogsets, judgements=judgements, forms=forms, languages=languages) import_cognates_from_excel(writer.ws, dataset) reread_tags = [(c[c_id], c["CommaSeparatedTags"]) for c in dataset["CognatesetTable"]] reread_tags.sort(key=lambda x: x[0]) tags.sort(key=lambda x: x[0]) assert reread_tags == tags
def test_cell_comments_export(): dataset, _ = copy_to_temp( Path(__file__).parent / "data/cldf/minimal/cldf-metadata.json") _, out_filename = tempfile.mkstemp(".xlsx", "cognates") writer = ExcelWriter(dataset, database_url="https://example.org/lexicon/{:}") forms = util.cache_table(dataset) languages = sorted(util.cache_table(dataset, "LanguageTable").values(), key=lambda x: x["name"]) judgements = util.cache_table(dataset, "CognateTable").values() cogsets = util.cache_table(dataset, "CognatesetTable").values() writer.create_excel(rows=cogsets, judgements=judgements, forms=forms, languages=languages) for col in writer.ws.iter_cols(): pass assert ( col[-1].comment and col[-1].comment.content ), "Last row of last column should contain a judgement, with a comment attached to it." assert (col[-1].comment.content == "A judgement comment" ), "Comment should match the comment from the cognate table"
def test_cell_comments_export(): dataset, _ = copy_to_temp( Path(__file__).parent / "data/cldf/minimal/cldf-metadata.json") _, out_filename = tempfile.mkstemp(".xlsx", "cognates") E = ExcelWriter(dataset) E.set_header() E.create_excel(out_filename, size_sort=False, language_order="Name") ws_out = openpyxl.load_workbook(out_filename).active for col in ws_out.iter_cols(): pass assert col[ -1].comment.content, "Last row of last column should contain a judgement, with a comment attached to it." assert (col[-1].comment.content == "A judgement comment" ), "Comment should match the comment from the cognate table"