Exemplo n.º 1
0
    def form_to_cell_value(self, form: types.Form) -> str:
        """Build a string describing the form itself

        Provide the best transcription and all translations of the form strung
        together.

        """

        transcription = self.get_best_transcription(form)
        translations = []

        suffix = ""
        # TODO: Use CLDF terms instead of column names, like the c_ elsewhere
        if form.get("Comment"):
            suffix = f" {WARNING:}"

        # corresponding concepts – TODO: distinguish between list data type
        # (multiple concepts) and others (single concept)
        c_concept = self.dataset["FormTable", "parameterReference"].name
        translations.append(form[c_concept])

        return "{:} ‘{:}’{:}".format(transcription, ", ".join(translations),
                                     suffix)
Exemplo n.º 2
0
    def form_to_cell_value(self, form: types.Form) -> str:
        """Build a string describing the form itself

        Provide the best transcription and all translations of the form strung
        together.

        >>> ds = util.fs.new_wordlist(FormTable=[], CognatesetTable=[], CognateTable=[])
        >>> E = ExcelWriter(dataset=ds)
        >>> E.form_to_cell_value({"form": "f", "parameterReference": "c"})
        'f ‘c’'
        >>> E.form_to_cell_value(
        ...   {"form": "f", "parameterReference": "c", "formComment": "Not empty"})
        'f ‘c’ ⚠'
        >>> E.form_to_cell_value(
        ...   {"form": "fo", "parameterReference": "c", "segments": ["f", "o"]})
        '{ f o } ‘c’'
        >>> E.form_to_cell_value(
        ...   {"form": "fo",
        ...    "parameterReference": "c",
        ...    "segments": ["f", "o"],
        ...    "segmentSlice": ["1:1"]})
        '{ f }o ‘c’'

        TODO: This function should at some point support alignments, so that
        the following call will return '{ - f - }o ‘c’' instead.

        >>> E.form_to_cell_value(
        ...   {"form": "fo",
        ...    "parameterReference": "c",
        ...    "segments": ["f", "o"],
        ...    "segmentSlice": ["1:1"],
        ...    "alignment": ["", "f", ""]})
        '{ f }o ‘c’'

        """
        segments = form.get("segments")
        if not segments:
            transcription = form["form"]
        else:
            transcription = ""
            # TODO: use CLDF property instead of column name
            included_segments: t.Iterable[int]
            try:
                included_segments = set(
                    parse_segment_slices(form["segmentSlice"],
                                         enforce_ordered=True))
            except TypeError:
                self.logger.warning(
                    "In judgement %s, for form %s, there was no segment slice. I will use the whole form.",
                    form["cognateReference"],
                    form["id"],
                )
                included_segments = range(len(form["segments"]))
            except KeyError:
                included_segments = range(len(form["segments"]))
            except ValueError:
                # What if segments overlap or cross? Overlap shouldn't happen,
                # but we don't check here. Crossing might happen, but this
                # serialization cannot reflect it, so we enforce order,
                # expecting that an error message here will be more useful than
                # silently messing with data. If the check fails, we take the
                # whole segment and warn.
                self.logger.warning(
                    "In judgement %s, for form %s, segment slice %s is invalid. I will use the whole form.",
                    form["cognateReference"],
                    form["id"],
                    ",".join(form["segmentSlice"]),
                )
                included_segments = range(len(form["segments"]))

            included = False
            for i, s in enumerate(segments):
                if included and i not in included_segments:
                    transcription += " }" + s
                    included = False
                elif not included and i in included_segments:
                    transcription += "{ " + s
                    included = True
                elif i in included_segments:
                    transcription += " " + s
                else:
                    transcription += s
            if included:
                transcription += " }"

            transcription = transcription.strip()
        translations = []

        suffix = ""
        try:
            if form.get("formComment"):
                suffix = f" {WARNING:}"
        except (KeyError):
            pass

        # corresponding concepts
        # (multiple concepts) and others (single concept)
        if isinstance(form["parameterReference"], list):
            for f in form["parameterReference"]:
                translations.append(f)
        else:
            translations.append(form["parameterReference"])
        return "{:} ‘{:}’{:}".format(transcription, ", ".join(translations),
                                     suffix)