示例#1
0
 def __append_input(self, base_fn, base_ext):
     for ae in base_ext:
         if sppasFileUtils(base_fn + ae).exists() \
                 and base_fn not in self._inputs:
             self._inputs.append(base_fn)
             return True
     return False
示例#2
0
文件: ipustrs.py 项目: lym0302/sppas
    def extract_aligned(self, trstier, nametier):
        """ Extract from a time-aligned transcription file.

        :returns: a tuple with tracks and silences lists

        """
        trstracks = []
        silences = []
        self._units = list()
        self._names = list()

        i = 0
        last = trstier.GetSize()
        while i < last:
            # Set the current annotation values
            ann = trstier[i]

            # Save information
            if ann.GetLabel().IsSilence():
                start = ann.GetLocation().GetBegin().GetMidpoint()
                end = ann.GetLocation().GetEnd().GetMidpoint()
                # Verify next annotations (concatenate all silences between 2 tracks)
                if (i + 1) < last:
                    nextann = trstier[i + 1]
                    while (i + 1) < last and nextann.GetLabel().IsSilence():
                        end = nextann.GetLocation().GetEnd().GetMidpoint()
                        i += 1
                        if (i + 1) < last:
                            nextann = trstier[i + 1]
                silences.append([start, end])
            else:
                start = ann.GetLocation().GetBegin().GetMidpoint()
                end = ann.GetLocation().GetEnd().GetMidpoint()
                trstracks.append([start, end])
                self._units.append(ann.GetLabel().GetValue())

                if nametier is not None:
                    aname = nametier.Find(ann.GetLocation().GetBegin().GetMidpoint(),
                                          ann.GetLocation().GetEnd().GetMidpoint(), True)
                    if len(aname) == 0:
                        trstracks.pop()
                        self._units.pop()
                    else:
                        sf = sppasFileUtils(aname[0].GetLabel().GetValue())
                        # We have to take care in case of duplicated names
                        filename = sf.clear_whitespace()
                        if len(filename) == 0:
                            filename = "unnamed_track"
                        new_name = filename
                        idx = 2
                        while new_name in self._names:
                            new_name = u"%s_%.06d" % (filename, idx)
                            idx += 1
                        self._names.append(new_name)

            # Continue
            i += 1

        return trstracks, silences
示例#3
0
    def fix_workingdir(inputaudio=""):
        """Fix the working directory to store temporarily the data.

        :param inputaudio: (str) Audio file name

        """
        sf = sppasFileUtils()
        workdir = sf.set_random()
        while os.path.exists(workdir) is True:
            workdir = sf.set_random()

        audio_file = os.path.basename(inputaudio)
        sf = sppasFileUtils(audio_file)
        formatted_audio_file = sf.format()

        os.mkdir(workdir)
        shutil.copy(inputaudio, os.path.join(workdir, formatted_audio_file))

        return workdir
示例#4
0
    def extract_aligned(self, trstier, nametier):
        """ Extract from a time-aligned transcription file.

        :returns: a tuple with tracks and silences lists

        """
        trstracks = []
        silences = []
        self._units = []
        self._names = []

        i = 0
        last = trstier.GetSize()
        while i < last:
            # Set the current annotation values
            ann = trstier[i]

            # Save information
            if ann.GetLabel().IsSilence():
                start = ann.GetLocation().GetBegin().GetMidpoint()
                end = ann.GetLocation().GetEnd().GetMidpoint()
                # Verify next annotations (concatenate all silences between 2 tracks)
                if (i + 1) < last:
                    nextann = trstier[i + 1]
                    while (i + 1) < last and nextann.GetLabel().IsSilence():
                        end = nextann.GetLocation().GetEnd().GetMidpoint()
                        i += 1
                        if (i + 1) < last:
                            nextann = trstier[i + 1]
                silences.append([start, end])
            else:
                start = ann.GetLocation().GetBegin().GetMidpoint()
                end = ann.GetLocation().GetEnd().GetMidpoint()
                trstracks.append([start, end])
                self._units.append(ann.GetLabel().GetValue())

                if nametier is not None:
                    aname = nametier.Find(ann.GetLocation().GetBegin().GetMidpoint(),
                                          ann.GetLocation().GetEnd().GetMidpoint(), True)
                    if len(aname) == 0:
                        trstracks.pop()
                        self._units.pop()
                    else:
                        sf = sppasFileUtils(aname[0].GetLabel().GetValue())
                        self._names.append(sf.clear_whitespace())

            # Continue
            i += 1

        return trstracks, silences
示例#5
0
def fix_audioinput(inputaudioname):
    """ Fix the audio file name that will be used.
    An only-ascii-based file name without whitespace is set if the
    current audio file name does not fit in these requirements.

    :param inputaudioname: (str) Audio file name

    """
    sf = sppasFileUtils(inputaudioname)
    inputaudio = sf.format()
    if inputaudio != inputaudioname:
        shutil.copy(inputaudioname, inputaudio)

    return inputaudio
示例#6
0
def fix_audioinput(inputaudioname):
    """ Fix the audio file name that will be used.
    An only-ascii-based file name without whitespace is set if the
    current audio file name does not fit in these requirements.

    :param inputaudioname: (str) Audio file name

    """
    sf = sppasFileUtils(inputaudioname)
    inputaudio = sf.format()
    if inputaudio != inputaudioname:
        shutil.copy(inputaudioname, inputaudio)

    return inputaudio
示例#7
0
    def read(self, filename):
        """ Read an ANT file and fill the Transcription.

        :param filename: (str)

        """
        zf = zipfile.ZipFile(filename, 'r')
        unzip_dir = sppasFileUtils().set_random()
        zf.extractall(unzip_dir)
        zf.close()

        antx_filename = os.path.join(unzip_dir, "annotation.xml")
        antx = sppasANTX()
        antx.read(antx_filename)
        self.set(antx)
示例#8
0
    def _get_filename(self, filename, extensions):
        """ Return a filename corresponding to one of extensions.

        :param filename: input file name
        :param extensions: the list of expected extension
        :returns: a file name of the first existing file with an expected extension or None

        """
        for ext in extensions:

            ext_filename = os.path.splitext(filename)[0] + ext
            new_filename = sppasFileUtils(ext_filename).exists()
            if new_filename is not None and os.path.isfile(new_filename):
                return new_filename

        return None
示例#9
0
    def _get_filename(filename, extensions):
        """Return a filename corresponding to one of the extensions.

        :param filename: input file name
        :param extensions: the list of expected extension
        :returns: a file name of the first existing file with an expected
        extension or None

        """
        base_name = os.path.splitext(filename)[0]
        for ext in extensions:
            ext_filename = base_name + ext
            new_filename = sppasFileUtils(ext_filename).exists()
            if new_filename is not None and os.path.isfile(new_filename):
                return new_filename

        return None
示例#10
0
def fix_workingdir(inputaudio=""):
    """ Fix the working directory to store temporarily the data.

    :param inputaudio: (str) Audio file name

    """
    if len(inputaudio) == 0:
        # Notice that the following generates a directory that the
        # aligners won't be able to access under Windows.
        # No problem with MacOS or Linux.
        sf = sppasFileUtils()
        workdir = sf.set_random()
        while os.path.exists(workdir) is True:
            workdir = sf.set_random()
    else:
        workdir = os.path.splitext(inputaudio)[0]+"-temp"

    os.mkdir(workdir)
    return workdir
示例#11
0
def fix_workingdir(inputaudio=""):
    """ Fix the working directory to store temporarily the data.

    :param inputaudio: (str) Audio file name

    """
    if len(inputaudio) == 0:
        # Notice that the following generates a directory that the
        # aligners won't be able to access under Windows.
        # No problem with MacOS or Linux.
        sf = sppasFileUtils()
        workdir = sf.set_random()
        while os.path.exists(workdir) is True:
            workdir = sf.set_random()
    else:
        workdir = os.path.splitext(inputaudio)[0] + "-temp"
        i = 1
        while os.path.exists(workdir) is True:
            workdir = os.path.splitext(inputaudio)[0] + "-temp" + str(i)
            i = i + 1

    os.mkdir(workdir)
    return workdir
示例#12
0
    def run_ipusegmentation(self, stepidx):
        """
        Execute the SPPAS-IPUSegmentation program.

        @return number of files processed successfully

        """
        # Initializations
        step = self.parameters.get_step(stepidx)
        stepname = self.parameters.get_step_name(stepidx)
        files_processed_success = 0
        self._progress.set_header(stepname)
        self._progress.update(0,"")

        # Get the list of input file names, with the ".wav" extension
        filelist = self.set_filelist(".wav")
        if len(filelist) == 0:
            return 0
        total = len(filelist)

        # Create annotation instance, and fix options
        try:
            seg = sppasIPUseg(self._logfile)
        except Exception as e:
            if self._logfile is not None:
                self._logfile.print_message("%s\n"%str(e), indent=1,status=4)
            return 0

        # Execute the annotation for each file in the list
        for i,f in enumerate(filelist):

            # fix the default values
            seg.reset()
            seg.fix_options(step.get_options())

            # Indicate the file to be processed
            if self._logfile is not None:
                self._logfile.print_message(stepname+" of file "+f, indent=1)
            self._progress.set_text(os.path.basename(f)+" ("+str(i+1)+"/"+str(total)+")")

            # Fix input/output file name
            outname = os.path.splitext(f)[0] + self.parameters.get_output_format()

            # Is there already an existing IPU-seg (in any format)!
            ext = []
            for e in sppas.src.annotationdata.aio.extensions_in:
                if not e in ['.txt','.hz', '.PitchTier']:
                    ext.append(e)
            existoutname = self._get_filename(f, ext)

            # it's existing... but not in the expected format: convert!
            if existoutname is not None and existoutname != outname:
                # just copy the file!
                if self._logfile is not None:
                    self._logfile.print_message('Export '+existoutname, indent=2)
                    self._logfile.print_message('into '+outname, indent=2)
                try:
                    t = sppas.src.annotationdata.aio.read(existoutname)
                    sppas.src.annotationdata.aio.write(outname,t)
                    # OK, now outname is as expected! (or not...)
                except Exception:
                    pass

            # Execute annotation
            tgfname = sppasFileUtils(outname).exists()
            if tgfname is None:
                # No already existing IPU seg., but perhaps a txt.
                txtfile = self._get_filename(f, [".txt"])
                if self._logfile is not None:
                    if txtfile:
                        self._logfile.print_message("A transcription was found, perform Silence/Speech segmentation time-aligned with a transcription %s"%txtfile, indent=2,status=3)
                    else:
                        self._logfile.print_message("No transcription was found, perform Silence/Speech segmentation only.", indent=2,status=3)
                try:
                    seg.run(f, trsinputfile=txtfile, ntracks=None, diroutput=None, tracksext=None, trsoutput=outname)
                    files_processed_success += 1
                    if self._logfile is not None:
                        self._logfile.print_message(outname, indent=2,status=0)
                except Exception as e:
                    if self._logfile is not None:
                        self._logfile.print_message("%s for file %s\n" % (str(e),outname), indent=2,status=-1)
            else:
                if seg.get_option('dirtracks') is True:
                    self._logfile.print_message("A time-aligned transcription was found, split into multiple files", indent=2,status=3)
                    try:
                        seg.run(f, trsinputfile=tgfname, ntracks=None, diroutput=None, tracksext=None, trsoutput=None)
                        files_processed_success += 1
                        if self._logfile is not None:
                            self._logfile.print_message(tgfname, indent=2,status=0)
                    except Exception as e:
                        if self._logfile is not None:
                            self._logfile.print_message("%s for file %s\n"%(str(e),tgfname), indent=2,status=-1)
                else:
                    if self._logfile is not None:
                        self._logfile.print_message("because a previous segmentation is existing.", indent=2,status=2)

            # Indicate progress
            self._progress.set_fraction(float((i+1))/float(total))
            if self._logfile is not None:
                self._logfile.print_newline()

        # Indicate completed!
        self._progress.update(1,"Completed (%d files successfully over %d files).\n"%(files_processed_success,total))
        self._progress.set_header("")

        return files_processed_success
示例#13
0
# -*- coding: utf8 -*-

import unittest
import os
import shutil

from ..label.label import Label
from ..ptime.point import TimePoint
from ..ptime.interval import TimeInterval
from ..annotation import Annotation
from ..aio.praat import TextGrid
from sppas.src.utils.fileutils import sppasFileUtils

# ---------------------------------------------------------------------------

TEMP = sppasFileUtils().set_random()
DATA = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")

# ---------------------------------------------------------------------------


class TestTextGrid(unittest.TestCase):
    """
    Test reader/writers of TextGrid files from Praat.
    
    """
    def setUp(self):
        if os.path.exists(TEMP) is False:
            os.mkdir(TEMP)

    def tearDown(self):
示例#14
0
    src.resources.tests.test_vocab.py
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

"""
import unittest
import os.path
import shutil

from sppas import RESOURCES_PATH
from sppas.src.utils.fileutils import sppasFileUtils
from sppas.src.utils.makeunicode import u
from ..vocab import sppasVocabulary

# ---------------------------------------------------------------------------

TEMP = sppasFileUtils().set_random()

VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data",
                     "vocab.txt")
ITA = os.path.join(RESOURCES_PATH, "vocab", "ita.vocab")
VOCAB_TEST = os.path.join(TEMP, "vocab.txt")

# ---------------------------------------------------------------------------


class TestVocabulary(unittest.TestCase):
    def setUp(self):
        if os.path.exists(TEMP) is False:
            os.mkdir(TEMP)

    def tearDown(self):