Пример #1
0
def test_adding_central_concepts():
    dataset = pycldf.Dataset.from_metadata(
        Path(__file__).parent /
        "data/cldf/smallmawetiguarani/cldf-metadata.json")
    dirname = Path(tempfile.mkdtemp(prefix="lexedata-test"))
    excel_writer = ExcelWriter(dataset=dataset, add_central_concepts=True)
    output = dirname / "out.xlsx"
    excel_writer.create_excel(out=output)
    # load central concepts from output
    ws = op.load_workbook(output).active
    concept_index = 0
    for row in ws.iter_rows(min_row=1, max_row=1):
        for cell in row:
            if cell.value == "Central_Concept":
                concept_index = cell.column
    # when accessing the row as a tuple the index is not 1-based as for excel sheets
    central_concepts = [
        row[concept_index - 1].value for row in ws.iter_rows(min_row=2)
    ]
    assert central_concepts == [
        "one",
        "one",
        "one",
        "one",
        "two",
        "three",
        "two",
        "three",
        "four_1",
        "four",
        "five",
    ]
Пример #2
0
def test_roundtrip(cldf_wordlist, working_and_nonworking_bibfile):
    filled_cldf_wordlist = working_and_nonworking_bibfile(cldf_wordlist)
    dataset, target = filled_cldf_wordlist
    c_formReference = dataset["CognateTable", "formReference"].name
    c_cogsetReference = dataset["CognateTable", "cognatesetReference"].name
    old_judgements = {(row[c_formReference], row[c_cogsetReference])
                      for row in dataset["CognateTable"].iterdicts()}
    writer = ExcelWriter(dataset,
                         database_url="https://example.org/lexicon/{:}")
    forms = util.cache_table(filled_cldf_wordlist[0])
    languages = util.cache_table(filled_cldf_wordlist[0],
                                 "LanguageTable").values()
    judgements = util.cache_table(filled_cldf_wordlist[0],
                                  "CognateTable").values()
    cogsets = util.cache_table(filled_cldf_wordlist[0],
                               "CognatesetTable").values()
    writer.create_excel(rows=cogsets,
                        judgements=judgements,
                        forms=forms,
                        languages=languages)

    # Reset the existing cognatesets and cognate judgements, to avoid
    # interference with the the data in the Excel file
    dataset["CognateTable"].write([])
    dataset["CognatesetTable"].write([])

    import_cognates_from_excel(writer.ws, dataset)

    new_judgements = {(row[c_formReference], row[c_cogsetReference])
                      for row in dataset["CognateTable"].iterdicts()}

    assert new_judgements == old_judgements
Пример #3
0
def test_adding_singleton_cognatesets():
    dataset = pycldf.Dataset.from_metadata(
        Path(__file__).parent /
        "data/cldf/smallmawetiguarani/cldf-metadata.json")
    dirname = Path(tempfile.mkdtemp(prefix="lexedata-test"))
    excel_writer = ExcelWriter(dataset=dataset, singleton_cognate=True)
    output = dirname / "out.xlsx"
    excel_writer.create_excel(out=output)
    # load central concepts from output
    ws = op.load_workbook(output).active
    cogset_index = 0
    for row in ws.iter_rows(min_row=1, max_row=1):
        for cell in row:
            if cell.value == "CogSet":
                cogset_index = cell.column
    # when accessing the row as a tuple the index is not 1-based as for excel sheets
    cogset_ids = [
        row[cogset_index - 1].value for row in ws.iter_rows(min_row=2)
    ]
    assert cogset_ids == [
        "one1",
        "one1",
        "one2",
        "one6",
        "two1",
        "three1",
        "two8",
        "three9",
        "four1",
        "four8",
        "five5",
        "X1_old_paraguayan_guarani",
        "X2_ache",
        "X3_paraguayan_guarani",
    ]
Пример #4
0
def test_roundtrip_separator_column(cldf_wordlist):
    """Test whether a CognatesetTable column with separator survives a roundtrip."""
    dataset, target = copy_to_temp(cldf_wordlist)
    dataset.add_columns("CognatesetTable", "CommaSeparatedTags")
    dataset["CognatesetTable", "CommaSeparatedTags"].separator = ","
    c_id = dataset["CognatesetTable", "id"].name

    write_back = list(dataset["CognatesetTable"])
    tags = []
    for tag, row in zip(
            itertools.cycle([["two", "tags"], ["single-tag"], [],
                             ["tag;containing;other;separator"]]),
            write_back,
    ):
        tags.append((row[c_id], tag))
        row["CommaSeparatedTags"] = tag
    dataset.write(CognatesetTable=write_back)

    writer = ExcelWriter(dataset)
    _, out_filename = tempfile.mkstemp(".xlsx", "cognates")
    writer.create_excel(out_filename)

    import_cognates_from_excel(out_filename, dataset)

    reread_tags = [(c[c_id], c["CommaSeparatedTags"])
                   for c in dataset["CognatesetTable"]]
    reread_tags.sort(key=lambda x: x[0])
    tags.sort(key=lambda x: x[0])
    assert reread_tags == tags
Пример #5
0
def test_toexcel_runs(cldf_wordlist):
    filled_cldf_wordlist = copy_to_temp(cldf_wordlist)
    writer = ExcelWriter(
        dataset=filled_cldf_wordlist[0],
        database_url=str(filled_cldf_wordlist[1]),
    )
    _, out_filename = tempfile.mkstemp(".xlsx", "cognates")
    writer.create_excel(out_filename)
Пример #6
0
def test_no_comment_column():
    dataset, _ = copy_to_temp(
        Path(__file__).parent /
        "data/cldf/smallmawetiguarani/cldf-metadata.json")
    dataset.remove_columns("FormTable", "comment")
    writer = ExcelWriter(dataset=dataset, )
    forms = util.cache_table(dataset).values()
    for form in forms:
        assert writer.form_to_cell_value(
            form).strip() == "{ e t a k ɾ ã } ‘one, one’"
        break
Пример #7
0
def test_adding_singleton_cognatesets_with_status(caplog):
    dataset = get_dataset(
        Path(__file__).parent /
        "data/cldf/smallmawetiguarani/cldf-metadata.json")
    dataset.add_columns("CognatesetTable", "Status_Column")
    with caplog.at_level(logging.WARNING):
        excel_writer = ExcelWriter(dataset=dataset)
        cogsets, judgements = create_singletons(
            dataset,
            status="NEW",
            by_segment=True,
        )
        properties_as_key(cogsets,
                          dataset["CognatesetTable"].tableSchema.columns)
        properties_as_key(judgements,
                          dataset["CognateTable"].tableSchema.columns)
        forms = util.cache_table(dataset)
        languages = util.cache_table(dataset, "LanguageTable").values()
        excel_writer.create_excel(rows=cogsets,
                                  judgements=judgements,
                                  forms=forms,
                                  languages=languages)
    assert re.search("no Status_Column to write", caplog.text) is None

    cogset_index = 0
    for row in excel_writer.ws.iter_rows(min_row=1, max_row=1):
        for cell in row:
            if cell.value == "Status_Column":
                cogset_index = cell.column - 1
    # when accessing the row as a tuple the index is not 1-based as for excel sheets
    status = [
        row[cogset_index].value for row in excel_writer.ws.iter_rows(min_row=2)
    ]
    assert status == [
        None,
        None,
        None,
        None,
        None,
        None,
        None,
        None,
        None,
        None,
        None,
        "NEW",
        "NEW",
        "NEW",
        "NEW",
    ]
Пример #8
0
def test_adding_singleton_cognatesets(caplog):
    dataset = get_dataset(
        Path(__file__).parent /
        "data/cldf/smallmawetiguarani/cldf-metadata.json")
    with caplog.at_level(logging.WARNING):
        excel_writer = ExcelWriter(dataset=dataset, )
        cogsets, judgements = create_singletons(
            dataset,
            status="NEW",
            by_segment=False,
        )
        properties_as_key(cogsets,
                          dataset["CognatesetTable"].tableSchema.columns)
        properties_as_key(judgements,
                          dataset["CognateTable"].tableSchema.columns)
        forms = util.cache_table(dataset)
        languages = util.cache_table(dataset, "LanguageTable").values()
        excel_writer.create_excel(rows=cogsets,
                                  judgements=judgements,
                                  forms=forms,
                                  languages=languages)
    assert re.search("No Status_Column", caplog.text)

    # load central concepts from output
    cogset_index = 0
    for row in excel_writer.ws.iter_rows(min_row=1, max_row=1):
        for cell in row:
            if cell.value == "CogSet":
                cogset_index = cell.column - 1
    # when accessing the row as a tuple the index is not 1-based as for excel sheets
    cogset_ids = [
        row[cogset_index].value for row in excel_writer.ws.iter_rows(min_row=2)
    ]
    assert cogset_ids == [
        "one1",
        "one1",
        "one2",
        "one6",
        "two1",
        "three1",
        "two8",
        "three9",
        "four1",
        "four8",
        "five5",
        "X_old_paraguayan_guarani_two_1",
        "X_paraguayan_guarani_five_1",
    ]
Пример #9
0
def test_missing_required_column():
    dataset, _ = copy_to_temp(
        Path(__file__).parent /
        "data/cldf/smallmawetiguarani/cldf-metadata.json")
    dataset.remove_columns("FormTable", "ID")
    # TODO: switch to pycldf.dataset.SchemaError
    with pytest.raises(KeyError):
        excel_writer = ExcelWriter(dataset=dataset)
        forms = util.cache_table(dataset)
        languages = util.cache_table(dataset, "LanguageTable").values()
        judgements = util.cache_table(dataset, "CognateTable")
        cogsets = util.cache_table(dataset, "CognatesetTable")
        excel_writer.create_excel(rows=cogsets,
                                  judgements=judgements,
                                  forms=forms,
                                  languages=languages)
Пример #10
0
def test_included_segments(caplog):
    ds = util.fs.new_wordlist(FormTable=[],
                              CognatesetTable=[],
                              CognateTable=[])
    E = ExcelWriter(dataset=ds)
    E.form_to_cell_value({"form": "f", "parameterReference": "c"})
    with caplog.at_level(logging.WARNING):
        cell = E.form_to_cell_value({
            "id": "0",
            "cognateReference": "j",
            "form": "fo",
            "parameterReference": "c",
            "segments": ["f", "o"],
            "segmentSlice": ["3:1"],
        })
        assert cell == "{ f o } ‘c’"

    assert re.search("segment slice '3:1' is invalid", caplog.text) is None
Пример #11
0
def test_no_cognate_table(caplog):
    dataset, _ = empty_copy_of_cldf_wordlist(
        Path(__file__).parent /
        "data/cldf/smallmawetiguarani/cldf-metadata.json")
    dataset.remove_table("CognateTable")
    with pytest.raises(SystemExit):
        ExcelWriter(dataset=dataset, )
    assert "presupposes a separate CognateTable" in caplog.text
    assert "lexedata.edit.add_cognate_table" in caplog.text
Пример #12
0
def test_toexcel_runs(cldf_wordlist, working_and_nonworking_bibfile):
    filled_cldf_wordlist = working_and_nonworking_bibfile(cldf_wordlist)
    writer = ExcelWriter(
        dataset=filled_cldf_wordlist[0],
        database_url=str(filled_cldf_wordlist[1]),
    )
    forms = util.cache_table(filled_cldf_wordlist[0])
    languages = util.cache_table(filled_cldf_wordlist[0],
                                 "LanguageTable").values()
    judgements = util.cache_table(filled_cldf_wordlist[0],
                                  "CognateTable").values()
    cogsets = util.cache_table(filled_cldf_wordlist[0],
                               "CognatesetTable").values()
    writer.create_excel(rows=cogsets,
                        judgements=judgements,
                        forms=forms,
                        languages=languages)
    _, out_filename = tempfile.mkstemp(".xlsx", "cognates")
    writer.wb.save(filename=out_filename)
Пример #13
0
def test_no_cognateset_table(caplog):
    dataset, _ = empty_copy_of_cldf_wordlist(
        Path(__file__).parent /
        "data/cldf/smallmawetiguarani/cldf-metadata.json")
    dataset.remove_table("CognatesetTable")
    # TODO: SystemExit or dataset error?
    with pytest.raises((SystemExit, SchemaError)) as exc_info:
        ExcelWriter(dataset=dataset, )
    if exc_info.type == SystemExit:
        assert "presupposes a separate CognatesetTable" in caplog.text
        assert "lexedata.edit.add_table" in caplog.text
Пример #14
0
def test_roundtrip(cldf_wordlist):
    filled_cldf_wordlist = copy_to_temp(cldf_wordlist)
    dataset, target = filled_cldf_wordlist
    c_formReference = dataset["CognateTable", "formReference"].name
    c_cogsetReference = dataset["CognateTable", "cognatesetReference"].name
    old_judgements = {(row[c_formReference], row[c_cogsetReference])
                      for row in dataset["CognateTable"].iterdicts()}
    writer = ExcelWriter(dataset)
    _, out_filename = tempfile.mkstemp(".xlsx", "cognates")
    writer.create_excel(out_filename)

    # Reset the existing cognatesets and cognate judgements, to avoid
    # interference with the the data in the Excel file
    dataset["CognateTable"].write([])
    dataset["CognatesetTable"].write([])

    import_cognates_from_excel(out_filename, dataset)

    new_judgements = {(row[c_formReference], row[c_cogsetReference])
                      for row in dataset["CognateTable"].iterdicts()}

    assert new_judgements == old_judgements
Пример #15
0
def test_roundtrip_separator_column(cldf_wordlist,
                                    working_and_nonworking_bibfile):
    """Test whether a CognatesetTable column with separator survives a roundtrip."""
    dataset, target = working_and_nonworking_bibfile(cldf_wordlist)
    dataset.add_columns("CognatesetTable", "CommaSeparatedTags")
    dataset["CognatesetTable", "CommaSeparatedTags"].separator = ","
    c_id = dataset["CognatesetTable", "id"].name

    write_back = list(dataset["CognatesetTable"])
    tags = []
    for tag, row in zip(
            itertools.cycle([["two", "tags"], ["single-tag"], [],
                             ["tag;containing;other;separator"]]),
            write_back,
    ):
        tags.append((row[c_id], tag))
        row["CommaSeparatedTags"] = tag
    dataset.write(CognatesetTable=write_back)

    writer = ExcelWriter(dataset,
                         database_url="https://example.org/lexicon/{:}")
    _, out_filename = tempfile.mkstemp(".xlsx", "cognates")
    forms = util.cache_table(dataset)
    languages = util.cache_table(dataset, "LanguageTable").values()
    judgements = util.cache_table(dataset, "CognateTable").values()
    cogsets = util.cache_table(dataset, "CognatesetTable").values()
    writer.create_excel(rows=cogsets,
                        judgements=judgements,
                        forms=forms,
                        languages=languages)

    import_cognates_from_excel(writer.ws, dataset)

    reread_tags = [(c[c_id], c["CommaSeparatedTags"])
                   for c in dataset["CognatesetTable"]]
    reread_tags.sort(key=lambda x: x[0])
    tags.sort(key=lambda x: x[0])
    assert reread_tags == tags
Пример #16
0
def test_cell_comments_export():
    dataset, _ = copy_to_temp(
        Path(__file__).parent / "data/cldf/minimal/cldf-metadata.json")
    _, out_filename = tempfile.mkstemp(".xlsx", "cognates")

    writer = ExcelWriter(dataset,
                         database_url="https://example.org/lexicon/{:}")
    forms = util.cache_table(dataset)
    languages = sorted(util.cache_table(dataset, "LanguageTable").values(),
                       key=lambda x: x["name"])
    judgements = util.cache_table(dataset, "CognateTable").values()
    cogsets = util.cache_table(dataset, "CognatesetTable").values()
    writer.create_excel(rows=cogsets,
                        judgements=judgements,
                        forms=forms,
                        languages=languages)

    for col in writer.ws.iter_cols():
        pass
    assert (
        col[-1].comment and col[-1].comment.content
    ), "Last row of last column should contain a judgement, with a comment attached to it."
    assert (col[-1].comment.content == "A judgement comment"
            ), "Comment should match the comment from the cognate table"
Пример #17
0
def test_cell_comments_export():
    dataset, _ = copy_to_temp(
        Path(__file__).parent / "data/cldf/minimal/cldf-metadata.json")
    _, out_filename = tempfile.mkstemp(".xlsx", "cognates")

    E = ExcelWriter(dataset)
    E.set_header()
    E.create_excel(out_filename, size_sort=False, language_order="Name")

    ws_out = openpyxl.load_workbook(out_filename).active
    for col in ws_out.iter_cols():
        pass
    assert col[
        -1].comment.content, "Last row of last column should contain a judgement, with a comment attached to it."
    assert (col[-1].comment.content == "A judgement comment"
            ), "Comment should match the comment from the cognate table"