Exemplo n.º 1
0
    def _write_data(self, fp):
        """Write the data content of the ARFF file.
        Data are the tags of the annotations or distributions of
        probabilities.

        * Each instance is represented on a single line, with carriage
        returns denoting the end of the instance.
        * Attribute values for each instance are delimited by commas.
        They must appear in the order that they were declared in the header.
        * Missing values are represented by a single question mark
        * Values of string and nominal attributes are case sensitive,
        and any that contain space must be quoted

        :param fp: FileDescriptor

        """
        fp.write(b("@DATA\n"))

        for point, class_str in self._fix_instance_steps():
            line = ""
            data_instances = self._fix_data_instance(point)
            for attribute in data_instances:
                line += attribute
                line += ","
            line += str(class_str)
            line += "\n"
            fp.write(b(line))
Exemplo n.º 2
0
    def write(self, filename):
        """Write a XRFF file.

        :param filename: (str)

        """
        if self.is_empty() is True:
            raise AioNoTiersError(self.default_extension)

        with codecs.open(filename, 'w', sg.__encoding__, buffering=8096) as fp:

            # Check metadata
            self.check_metadata()

            # Check the annotation tags.
            self.validate_annotations()

            # Check if the metadata are properly fixed.
            self.validate()

            # OK, we are ready to write
            fp.write(b('<?xml version="1.0" encoding="utf-8"?>\n'))
            fp.write(b("\n"))
            fp.write(b('<dataset name="{:s}" />\n'.format(self.get_name())))
            fp.write(b("\n"))
            fp.write(b('<header>\n'))
            self._write_attributes(fp)
            fp.write(b('</header>\n'))
            fp.write(b('\n'))
            fp.write(b('<body>\n'))
            self._write_instances(fp)
            fp.write(b('</body>\n'))

            fp.close()
Exemplo n.º 3
0
    def test_regexp(self):
        """ tag matches the regexp. """

        self.assertTrue(self.tc.regexp(sppasTag("abc"), "^a[a-z]"))
        self.assertFalse(self.tc.regexp(sppasTag("abc"), "d"))

        with self.assertRaises(TypeError):
            self.tc.regexp("abc", b("B"))
Exemplo n.º 4
0
    def save_tips(self, filename=TIPS_FILE):
        """ Save tips in a file.

        :param filename: (str) Name of the file to store message tips.

        """
        with codecs.open(filename, 'w', encoding) as f:
            for message in self._tips:
                f.write("{:s}\n".format(b(message)))
Exemplo n.º 5
0
    def save_tips(self, filename=TIPS_FILE):
        """ Save tips in a file.

        :param filename: (str) Name of the file to store message tips.

        """
        with codecs.open(filename, 'w', encoding) as f:
            for message in self._tips:
                f.write("{:s}\n".format(b(message)))
Exemplo n.º 6
0
    def test_icontains(self):
        """ tag contains text (case in-sensitive). """

        self.assertTrue(self.tc.icontains(sppasTag("abc"), u("B")))
        self.assertFalse(self.tc.icontains(sppasTag("abc"), u("d")))

        with self.assertRaises(TypeError):
            self.tc.icontains("abc", u("B"))
        with self.assertRaises(TypeError):
            self.tc.icontains(sppasTag("abc"), b("d"))
Exemplo n.º 7
0
    def test_iendswith(self):
        """ tag endswith text (case in-sensitive). """

        self.assertTrue(self.tc.iendswith(sppasTag("abc"), u("C")))
        self.assertFalse(self.tc.iendswith(sppasTag("abc"), u("b")))

        with self.assertRaises(TypeError):
            self.tc.iendswith("abc", u("C"))
        with self.assertRaises(TypeError):
            self.tc.iendswith(sppasTag("abc"), b("b"))
Exemplo n.º 8
0
    def test_startswith(self):
        """ tag startswith text (case sensitive). """

        self.assertTrue(self.tc.startswith(sppasTag("abc"), u("a")))
        self.assertFalse(self.tc.startswith(sppasTag("abc"), u("b")))

        with self.assertRaises(TypeError):
            self.tc.startswith("abc", u("a"))
        with self.assertRaises(TypeError):
            self.tc.startswith(sppasTag("abc"), b("b"))
Exemplo n.º 9
0
    def test_iexact(self):
        """ tag == text (case in-sensitive). """

        self.assertTrue(self.tc.iexact(sppasTag("abc"), u("ABC")))
        self.assertFalse(self.tc.iexact(sppasTag("abc"), u("AAA")))

        with self.assertRaises(TypeError):
            self.tc.iexact("abc", u("ABC"))
        with self.assertRaises(TypeError):
            self.tc.iexact(sppasTag("abc"), b("ABC"))
Exemplo n.º 10
0
    def _write_attributes(self, fp):
        """Write the attributes of the ARFF file.
        Attributes are corresponding to the controlled vocabulary.
        They are the list of possible tags of the annotations, except
        for the numerical ones.

        It is supposed that the transcription has been already validated.

        """
        fp.write(b('    <attributes>\n'))
        for tier in self:

            is_att, is_numeric = sppasWEKA._tier_is_attribute(tier)
            if is_att is False:
                continue

            if is_numeric is True:
                # Tags will be converted to probabilities
                for tag in tier.get_ctrl_vocab():
                    # Do not write an uncertain label in that situation.
                    if tag.get_content() != self._uncertain_annotation_tag:
                        attribute_name = \
                            tier.get_name() + "-" + tag.get_content()
                        fp.write(
                            b('        <attribute name="{:s}" '
                              'type="numeric" />\n'
                              ''.format(attribute_name)))
            else:
                # Either a generic "string" or we can explicitly fix the list
                if len(tier.get_ctrl_vocab()) > self._max_attributes_tags:
                    fp.write(
                        b('        <attribute name="{:s}" '
                          'type="nominal" />\n'
                          ''.format(tier.get_name())))
                else:
                    # The controlled vocabulary
                    fp.write(
                        b('        <attribute name="{:s}" '
                          'type="nominal">'.format(tier.get_name())))
                    fp.write('            <labels>\n')
                    for tag in tier.get_ctrl_vocab():
                        fp.write(
                            b("            <label>{:s}"
                              "</label>\n"
                              "".format(tag.get_content())))
                    fp.write(b('            </labels>\n'))
                    fp.write(b('        </attribute>\n'))

        tier = self._get_class_tier()
        self._write_attribute_ctrl_vocab(tier, fp, is_class=True)

        fp.write(b('    </attributes>\n'))
Exemplo n.º 11
0
    def save_tips(self, filename=None):
        """Save tips in a file.

        :param filename: (str) Name of the file to store message tips.

        """
        if filename is None:
            filename = os.path.join(paths.etc, "tips.txt")

        with codecs.open(filename, 'w', sg.__encoding__) as f:
            for message in self._tips:
                f.write("{:s}\n".format(b(message)))
Exemplo n.º 12
0
    def prepend_silence(self, nframes):
        """Create n frames of silence and prepend it to the frames.

        :param nframes: (int) the number of frames of silence to append

        """
        nframes = int(nframes)
        if nframes <= 0:
            return False

        self._frames = b(" \x00") * nframes + self._frames
        return True
Exemplo n.º 13
0
    def _write_instances(self, fp):
        """Write the data content of the XRFF file.
        Data are the tags of the annotations or distributions of
        probabilities.

        :param fp: FileDescriptor

        """
        fp.write(b("    <instances>\n"))
        for point, class_str in self._fix_instance_steps():
            data_instances = self._fix_data_instance(point)
            fp.write(b("        <instance>\n"))

            for attribute in data_instances:

                fp.write(
                    b("            <value>{!s:s}</value>\n"
                      "".format(attribute)))

            fp.write(
                b("            <value>{!s:s}</value>\n"
                  "".format(class_str)))

            fp.write(b("        </instance>\n"))

        fp.write(b("    </instances>\n"))
Exemplo n.º 14
0
    def _write_attribute_ctrl_vocab(tier, fp, is_class=False):
        """ Write the controlled vocabulary in an attribute set.

        :param tier: (sppasTier)
        :param fp: FileDescription
        :param is_class: (boolean)

        """
        fp.write(b('        <attribute '))
        if is_class is True:
            fp.write(b('class="yes" '))
        fp.write(b('name="{:s}" type="nominal">\n'.format(tier.get_name())))
        fp.write(b('            <labels>\n'))
        for tag in tier.get_ctrl_vocab():
            fp.write(b("            <label>{:s}</label>\n".format(tag.get_content())))
        fp.write(b('            </labels>\n'))
        fp.write(b('        </attribute>\n'))
Exemplo n.º 15
0
 def __str__(self):
     return "{!s:s} ({!s:s})".format(b(self.get_content()),
                                     self.get_type())
Exemplo n.º 16
0
 def __repr__(self):
     return "Tag: {!s:s},{!s:s}".format(b(self.get_content()),
                                        self.get_type())