Exemplo n.º 1
0
    def __init__(self, dictfilename, unkstamp=u"UNK", nodump=False):
        """
        Constructor.

        @param dictfilename is the dictionary file name (HTK-ASCII format)
        @param unkstamp is the string used to represent a missing pronunciation

        """

        # Symbol to represent missing entries in the dictionary
        # (also called unknown entries)
        self._unkstamp = unkstamp
        self._filename = dictfilename

        # The pronunciation dictionary
        self._dict = {}

        if dictfilename is not None:

            data = None
            if nodump is False:
                # Try first to get the dict from a dump file (at least 2 times faster)
                data = rutils.load_from_dump( dictfilename )

            # Load from ascii if: 1st load, or, dump load error, or dump older than ascii
            if data is None:
                self.load_from_ascii( dictfilename )
                if nodump is False:
                    rutils.save_as_dump( self._dict, dictfilename )
                logging.info('Get dictionary from ASCII file.')

            else:
                self._dict = data
                logging.info('Get dictionary from dumped file.')
Exemplo n.º 2
0
    def __init__(self, filename=None, nodump=False, casesensitive=False):
        """
        Create a WordsList instance.

        @param filename (str) is the word list file name, i.e. a file with 1 column.
        @param nodump (Boolean) allows to disable the creation of a dump file.
        @param casesensitive (Boolean) the list of word is case-sensitive or not

        """
        self._stw = {}
        # a list is enough but a dictionary is used because:
        # with a dictionary it is faster to read tokens from a file and is also
        # faster to find a token in it!

        # Set the list of word to be case-sensitive or not.
        self.casesensitive = casesensitive

        if filename is not None:

            data = None
            #if nodump is False:
            # Try first to get the dict from a dump file
            # (at least 2 times faster than the ascii one)
            data = rutils.load_from_dump( filename )

            # Load from ascii if: 1st load, or, dump load error, or dump older than ascii
            if data is None:
                self.load_from_ascii( filename )
                if nodump is False:
                    rutils.save_as_dump( self._stw, filename )
                logging.info('Got word list from ASCII file.')

            else:
                self._stw = data
                logging.info('Got word list from dumped file.')
Exemplo n.º 3
0
    def __init__(self, dictfilename=None, nodump=False):
        """
        Constructor.

        @param dictfilename is the dictionary file name (2 columns)
        @param nodump (Boolean) disable the creation of a dump file

        """
        self._dict = {}

        if dictfilename is not None:

            data = None
            if nodump is False:
                # Try first to get the dict from a dump file (at least 2 times faster)
                data = rutils.load_from_dump( dictfilename )

            # Load from ascii if: 1st load, or, dump load error, or dump older than ascii
            if data is None:
                self.load_from_ascii( dictfilename )
                if nodump is False:
                    rutils.save_as_dump( self._dict, dictfilename )

            else:
                self._dict = data
Exemplo n.º 4
0
    def __init__(self, filename=None, nodump=False):
        """
        Create a new WordsList instance.

        @param filename is the word list file name (1 column)
        @param nodump (Boolean) disable the creation of a dump file

        """

        self._stw = {}
        # a list is enough but:
        # a dictionary is used because it is faster (to read tokens from
        # a file / to find a token in this list).

        if filename is not None:

            data = None
            if nodump is False:
                # Try first to get the dict from a dump file (at least 2 times faster)
                data = rutils.load_from_dump( filename )

            # Load from ascii if: 1st load, or, dump load error, or dump older than ascii
            if data is None:
                self.load_from_ascii( filename )
                if nodump is False:
                    rutils.save_as_dump( self._stw, filename )
                logging.info('Get word list from ASCII file.')

            else:
                self._stw = data
                logging.info('Get word list from dumped file.')
Exemplo n.º 5
0
    def __init__(self, dictfilename=None, nodump=False):
        """
        Constructor.

        @param dictfilename is the dictionary file name (2 columns)
        @param nodump (Boolean) disable the creation of a dump file

        """

        # Symbol to represent missing entries in the dictionary
        # (also called unknown entries)
        self._filename = dictfilename

        # The replacements dictionary
        self._dict = {}

        if dictfilename is not None:

            data = None
            if nodump is False:
                # Try first to get the dict from a dump file (at least 2 times faster)
                data = rutils.load_from_dump( dictfilename )

            # Load from ascii if: 1st load, or, dump load error, or dump older than ascii
            if data is None:
                self.load_from_ascii( dictfilename )
                if nodump is False:
                    rutils.save_as_dump( self._dict, dictfilename )
                logging.info('Get dictionary from ASCII file.')

            else:
                self._dict = data
                logging.info('Get dictionary from dumped file.')
Exemplo n.º 6
0
    def __init__(self, filename=None, nodump=True):
        """
        Constructor.

        """
        self._sum  = 0
        self._dict = {}

        if filename is not None:

            data = None
            if nodump is False:
                # Try first to get the dict from a dump file (at least 2 times faster)
                data = rutils.load_from_dump( filename )

            # Load from ascii if: 1st load, or, dump load error, or dump older than ascii
            if data is None:
                self.load_from_ascii( filename )
                if nodump is False:
                    rutils.save_as_dump( self._dict, filename )

            else:
                self._dict = data