def __init__(self, holder_type): self.input = Input() # Input object for the archive. self.cur_key = None # Current key (if state == kHaveObject). self.type = holder_type # type of the holder self.holder = NewHolderByType(self.type) # Holds the object we just # read (if state == kHaveObject). self.rspecifier = None self.archive_rxfilename = None self.opts = None self.state = RandomAccessTableReaderStateType.kUninitialized
def __init__(self, holder_type): self.input = Input() self.opts = None self.rspecifier = None self.script = None self.keys = None self.script_rxfilename = None self.key = None self.type = holder_type self.holder = NewHolderByType(self.type) self.data_rxfilename = None self.last_found = 0 self.state = SequentialTableReaderStateType.kUninitialized
def __init__(self, holder_type): """Initialize the reader for the given holder type. Args: holder_type: The given holder type. """ self.rspecifier = None self.opts = None self.archive_rxfilename = None self.input = Input() self.type = holder_type self.holder = NewHolderByType(self.type) self.key = None self.state = SequentialTableReaderStateType.kUninitialized
def Open(self, rspecifier): """Open a reader for the given rspecifier. Args: rspecifier: The given rspecifier. Returns: A boolean variable indicating if the operation is successful. """ # You may call Open from states kUninitialized and kError. # It may leave the object in any of the states. if self.state == RandomAccessTableReaderStateType.kNoObject or \ self.state == RandomAccessTableReaderStateType.kHaveObject: # call Close() yourself to suppress this exception. if not self.Close(): LogError( 'Error closing previous input, rspecifier was \"%s\"' % self.rspecifier) self.rspecifier = rspecifier (rspecifier_type, rxfilename, opts) = ClassifyRspecifier(rspecifier) self.script_rxfilename = rxfilename self.opts = opts if rspecifier_type != RspecifierType.kScriptRspecifier: LogError('Invalid rspecifier type \"%s\"' % rspecifier_type) script_input = Input() if not script_input.Open(self.script_rxfilename): LogError('Failed opening script file \"%s\"' % self.script_rxfilename) if script_input.IsBinary(): LogError('script file should not be in binary format.') script = list() while True: line = script_input.Stream().Readline() if not line: break token = line.rstrip().split() if len(token) != 2: LogError('Invalid line \"%s\"' % line) script.append((token[0], token[1])) self.script = sorted(script, key=itemgetter(0)) self.keys = [key for key, _ in self.script] self.state = RandomAccessTableReaderStateType.kNoObject self.key = None return True
def Open(self, rspecifier): """Open a reader for the given rspecifier. Args: rspecifier: The given rspecifier. Returns: A boolean variable indicating if the operation is successful. """ if self.state != SequentialTableReaderStateType.kUninitialized: # call Close() yourself to suppress this exception. if not self.Close(): if self.opts.permissive: LogWarning('Error closing previous input (only warning, ' 'since permissive mode).') else: LogError('Error closing previous input, rspecifier was ' '\"%s\"' % self.rspecifier) self.rspecifier = rspecifier (rspecifier_type, rxfilename, opts) = ClassifyRspecifier(rspecifier) self.archive_rxfilename = rxfilename self.opts = opts if rspecifier_type != RspecifierType.kArchiveRspecifier: LogError('Invalid rspecifier type \"%s\"' % rspecifier_type) self.input = Input() if self.holder.IsReadInBinary(): success = self.input.Open(self.archive_rxfilename) else: success = self.input.OpenTextMode(self.archive_rxfilename) if not success: self.state = SequentialTableReaderStateType.kUninitialized LogError('Failed to open stream \"%s\"' % self.archive_rxfilename) self.state = SequentialTableReaderStateType.kFileStart self.Next() if self.state == SequentialTableReaderStateType.kError: self.input.Close() self.state = SequentialTableReaderStateType.kUninitialized LogError('Error beginning to read archive file \"%s\" (wrong ' 'filename?)' % self.archive_rxfilename) if self.state != SequentialTableReaderStateType.kHaveObject and \ self.state != SequentialTableReaderStateType.kEof: LogError('Invalid state \"%s\"' % self.state) return True
def Open(self, rspecifier): """Open a reader for the given rspecifier. Args: rspecifier: The given rspecifier. Returns: A boolean variable indicating if the operation is successful. """ # You may call Open from states kUninitialized and kError. # It may leave the object in any of the states. if self.state != SequentialTableReaderStateType.kUninitialized and \ self.state != SequentialTableReaderStateType.kError: # call Close() yourself to suppress this exception. if not self.Close(): LogError( 'Error closing previous input, rspecifier was \"%s\"' % self.rspecifier) self.rspecifier = rspecifier (rspecifier_type, rxfilename, opts) = ClassifyRspecifier(rspecifier) self.script_rxfilename = rxfilename self.opts = opts if rspecifier_type != RspecifierType.kScriptRspecifier: LogError('Invalid rspecifier type \"%s\"' % rspecifier_type) self.script_input = Input() if not self.script_input.Open(self.script_rxfilename): LogError('Failed opening script file \"%s\"' % self.script_rxfilename) if self.script_input.IsBinary(): self.SetErrorState() LogError('script file should not be in binary format.') else: self.state = SequentialTableReaderStateType.kFileStart self.Next() if self.state == SequentialTableReaderStateType.kError: return False # any other status, including kEof, is OK from the point of view # of the 'open' function (empty scp file is not inherently an # error). return True
class RandomAccessTableReaderScriptImpl(object): """RandomAccessTableReaderScriptImpl is for random-access reading of archives when a script file is specified. For simplicity we just read it in all in one go, as it's unlikely someone would generate this from a pipe. In principle we could read it on-demand as for the archives, but this would probably be overkill. """ def __init__(self, holder_type): self.input = Input() self.opts = None self.rspecifier = None self.script = None self.keys = None self.script_rxfilename = None self.key = None self.type = holder_type self.holder = NewHolderByType(self.type) self.data_rxfilename = None self.last_found = 0 self.state = SequentialTableReaderStateType.kUninitialized def Open(self, rspecifier): """Open a reader for the given rspecifier. Args: rspecifier: The given rspecifier. Returns: A boolean variable indicating if the operation is successful. """ # You may call Open from states kUninitialized and kError. # It may leave the object in any of the states. if self.state == RandomAccessTableReaderStateType.kNoObject or \ self.state == RandomAccessTableReaderStateType.kHaveObject: # call Close() yourself to suppress this exception. if not self.Close(): LogError( 'Error closing previous input, rspecifier was \"%s\"' % self.rspecifier) self.rspecifier = rspecifier (rspecifier_type, rxfilename, opts) = ClassifyRspecifier(rspecifier) self.script_rxfilename = rxfilename self.opts = opts if rspecifier_type != RspecifierType.kScriptRspecifier: LogError('Invalid rspecifier type \"%s\"' % rspecifier_type) script_input = Input() if not script_input.Open(self.script_rxfilename): LogError('Failed opening script file \"%s\"' % self.script_rxfilename) if script_input.IsBinary(): LogError('script file should not be in binary format.') script = list() while True: line = script_input.Stream().Readline() if not line: break token = line.rstrip().split() if len(token) != 2: LogError('Invalid line \"%s\"' % line) script.append((token[0], token[1])) self.script = sorted(script, key=itemgetter(0)) self.keys = [key for key, _ in self.script] self.state = RandomAccessTableReaderStateType.kNoObject self.key = None return True def IsOpen(self): if self.state == RandomAccessTableReaderStateType.kNoObject or \ self.state == RandomAccessTableReaderStateType.kHaveObject: return True else: return False def Close(self): if not self.IsOpen(): LogError('Called on input that was not open.') self.input.Close() self.holder.Clear() self.last_found = 0 self.script = None self.key = None self.data_rxfilename = None self.state = SequentialTableReaderStateType.kUninitialized return True def HasKey(self, key): preload = self.opts.permissive return self.HasKeyInternal(key, preload) def Value(self, key): if not self.HasKeyInternal(key, True): LogError('Could not get item for key = %s' % key) return self.holder.Value() def HasKeyInternal(self, key, preload): if self.state == SequentialTableReaderStateType.kUninitialized or \ self.state == SequentialTableReaderStateType.kError: LogError( 'Called on RandomAccessTableReader object that is not open.') elif self.state == RandomAccessTableReaderStateType.kHaveObject: if key == self.key: return True else: pass if not self.LookupKey(key): return False else: if not preload: return True else: data_rxfilename = self.script[self.last_found][1] if self.state == RandomAccessTableReaderStateType.kHaveObject and \ data_rxfilename != self.data_rxfilename: self.state = RandomAccessTableReaderStateType.kNoObject self.holder.Clear() self.key = key self.data_rxfilename = data_rxfilename if self.state == RandomAccessTableReaderStateType.kNoObject: success = self.input.Open(self.data_rxfilename) if not success: LogError('Failed to open file \"%s\"' % self.data_rxfilename) return False else: if self.holder.Read(self.input.Stream(), self.input.IsBinary()): self.state = RandomAccessTableReaderStateType.kHaveObject else: LogError('Failed to load object from \"%s\"' % self.data_rxfilename) return False return True def LookupKey(self, key): for i in xrange(2): if self.last_found < len(self.script) and \ self.script[self.last_found][0] == key: return True self.last_found += 1 self.last_found -= 1 idx = bisect.bisect(self.keys, key) - 1 if self.keys[idx] == key: self.last_found = idx return True else: return False
class SequentialTableReaderArchiveImpl(object): def __init__(self, holder_type): """Initialize the reader for the given holder type. Args: holder_type: The given holder type. """ self.rspecifier = None self.opts = None self.archive_rxfilename = None self.input = Input() self.type = holder_type self.holder = NewHolderByType(self.type) self.key = None self.state = SequentialTableReaderStateType.kUninitialized def Open(self, rspecifier): """Open a reader for the given rspecifier. Args: rspecifier: The given rspecifier. Returns: A boolean variable indicating if the operation is successful. """ if self.state != SequentialTableReaderStateType.kUninitialized: # call Close() yourself to suppress this exception. if not self.Close(): if self.opts.permissive: LogWarning('Error closing previous input (only warning, ' 'since permissive mode).') else: LogError('Error closing previous input, rspecifier was ' '\"%s\"' % self.rspecifier) self.rspecifier = rspecifier (rspecifier_type, rxfilename, opts) = ClassifyRspecifier(rspecifier) self.archive_rxfilename = rxfilename self.opts = opts if rspecifier_type != RspecifierType.kArchiveRspecifier: LogError('Invalid rspecifier type \"%s\"' % rspecifier_type) self.input = Input() if self.holder.IsReadInBinary(): success = self.input.Open(self.archive_rxfilename) else: success = self.input.OpenTextMode(self.archive_rxfilename) if not success: self.state = SequentialTableReaderStateType.kUninitialized LogError('Failed to open stream \"%s\"' % self.archive_rxfilename) self.state = SequentialTableReaderStateType.kFileStart self.Next() if self.state == SequentialTableReaderStateType.kError: self.input.Close() self.state = SequentialTableReaderStateType.kUninitialized LogError('Error beginning to read archive file \"%s\" (wrong ' 'filename?)' % self.archive_rxfilename) if self.state != SequentialTableReaderStateType.kHaveObject and \ self.state != SequentialTableReaderStateType.kEof: LogError('Invalid state \"%s\"' % self.state) return True def Next(self): if self.state == SequentialTableReaderStateType.kHaveObject: self.holder.Clear() elif self.state == SequentialTableReaderStateType.kFileStart or \ self.state == SequentialTableReaderStateType.kFreedObject: pass else: LogError('Invalid state \"%s\"' % self.state) if self.input.Stream().Eof(): self.state = SequentialTableReaderStateType.kEof return True self.key = ReadToken(self.input.Stream(), self.input.IsBinary(), False) c = self.input.Stream().Peek(1) # We expect a space ' ' after the key. We also allow tab, just so we # can read archives generated by scripts that may not be fully aware # of how this format works. if c != ' ' and c != '\t' and c != '\n': LogError('Invalid archive file format: expected space after key ' '\"%s\", got character \"%s\" when reading archive ' '\"%s\".' % (self.key, c, self.archive_rxfilename)) if c != '\n': # Consume the space or tab. self.input.Stream().Read(1) binary = InitKaldiInputStream(self.input.Stream()) if not self.holder.Read(self.input.Stream(), binary): self.holder.Clear() LogError('Failed to read object from archive \"%s\"' % self.archive_rxfilename) self.state = SequentialTableReaderStateType.kHaveObject return True def IsOpen(self): if self.state == SequentialTableReaderStateType.kEof or \ self.state == SequentialTableReaderStateType.kHaveObject or \ self.state == SequentialTableReaderStateType.kFreedObject: return True elif self.state == SequentialTableReaderStateType.kUninitialized: return False else: # note: kFileStart is not a valid state for the user to call a # member function (we never return from a public function in # this state). LogError('Invalid state \"%s\"' % self.state) def Done(self): if self.state == SequentialTableReaderStateType.kHaveObject: return False elif self.state == SequentialTableReaderStateType.kEof or \ self.state == SequentialTableReaderStateType.kError: # Error condition, like Eof, counts as Done(); the # destructor/Close() will inform the user of the error. return True else: LogError('Invalid state \"%s\"' % self.state) def Key(self): if self.state != SequentialTableReaderStateType.kHaveObject: LogError('Invalid state \"%s\"' % self.state) return self.key def Value(self): if self.state != SequentialTableReaderStateType.kHaveObject: LogError('Invalid state \"%s\"' % self.state) return self.holder.Value() def Close(self): if not self.IsOpen(): LogError('Called on input that was not open.') status = 0 if self.input.IsOpen(): status = self.input.Close() if self.state == SequentialTableReaderStateType.kHaveObject: self.holder.Clear() old_state = self.state self.state = SequentialTableReaderStateType.kUninitialized if old_state == SequentialTableReaderStateType.kError or \ (old_state == SequentialTableReaderStateType.kEof and status != 0): if self.opts.permissive: LogWarning('Error state detected closing reader. Ignoring ' 'it because you specified permissive mode.') return True else: return False else: return True
class RandomAccessTableReaderArchiveImplBase(object): """Base class for derived implementations such as unsorted/sorted/doubly sorted. """ def __init__(self, holder_type): self.input = Input() # Input object for the archive. self.cur_key = None # Current key (if state == kHaveObject). self.type = holder_type # type of the holder self.holder = NewHolderByType(self.type) # Holds the object we just # read (if state == kHaveObject). self.rspecifier = None self.archive_rxfilename = None self.opts = None self.state = RandomAccessTableReaderStateType.kUninitialized def Open(self, rspecifier): if self.state != RandomAccessTableReaderStateType.kUninitialized: if not self.Close(): LogError('Failed to close previous input \"%s\".' % self.rspecifier) (rspecifier_type, rxfilename, opts) = ClassifyRspecifier(rspecifier) if rspecifier_type != RspecifierType.kArchiveRspecifier: LogError('Invalid rspecifier type \"%s\"' % rspecifier_type) self.rspecifier = rspecifier self.archive_rxfilename = rxfilename self.opts = opts if self.holder.IsReadInBinary(): success = self.input.Open(self.archive_rxfilename) else: success = self.input.OpenTextMode(self.archive_rxfilename) if not success: self.state = RandomAccessTableReaderStateType.kUninitialized LogError('Failed to open stream \"%s\"' % self.archive_rxfilename) else: self.state = RandomAccessTableReaderStateType.kNoObject return True def ReadNextObject(self): if self.state != RandomAccessTableReaderStateType.kNoObject: LogError('Called from the wrong state \"%s\"' % self.state) if self.input.Stream().Eof(): self.state = RandomAccessTableReaderStateType.kEof return False self.cur_key = ReadToken(self.input.Stream(), self.input.IsBinary(), False) c = self.input.Stream().Peek(1) # We expect a space ' ' after the key. We also allow tab, just so we # can read archives generated by scripts that may not be fully aware # of how this format works. if c != ' ' and c != '\t' and c != '\n': LogError( 'Invalid archive file format: expected space after key ' '\"%s\", got character \"%s\" when reading archive \"%s\".' % (self.cur_key, c, self.archive_rxfilename)) if c != '\n': # Consume the space or tab. self.input.Stream().Read(1) binary = InitKaldiInputStream(self.input.Stream()) if not self.holder.Read(self.input.Stream(), binary): self.holder.Clear() LogError('Failed to read object from archive \"%s\"' % self.archive_rxfilename) self.state = RandomAccessTableReaderStateType.kHaveObject return True def IsOpen(self): if self.state == RandomAccessTableReaderStateType.kEof or \ self.state == RandomAccessTableReaderStateType.kError or \ self.state == RandomAccessTableReaderStateType.kHaveObject or \ self.state == RandomAccessTableReaderStateType.kNoObject: return True elif self.state == RandomAccessTableReaderStateType.kUninitialized: return False else: LogError('Invalid state \"%s\"' % self.state) def CloseInternal(self): """Called by the child-class virutal Close() functions, does the shared parts of the cleanup. """ if not self.IsOpen(): LogError('Called twice or otherwise wrongly.') if self.input.IsOpen(): self.input.Close() if self.state == RandomAccessTableReaderStateType.kHaveObject: self.holder.Clear() ans = (self.state != RandomAccessTableReaderStateType.kError) self.state = RandomAccessTableReaderStateType.kUninitialized if not ans and self.opts.permissive: LogWarning('Error state detected closing reader. Ignoring it ' 'because you specified permissive mode.') return ans
class SequentialTableReaderScriptImpl(object): def __init__(self, holder_type): """Initialize the reader for the given holder type. Args: holder_type: The given holder type. """ self.rspecifier = None self.opts = None self.script_rxfilename = None self.script_input = Input() self.data_input = Input() self.type = holder_type self.holder = NewHolderByType(self.type) self.range_holder = NewHolderByType(self.type) self.key = None self.data_rxfilename = None self.range = None self.state = SequentialTableReaderStateType.kUninitialized def Open(self, rspecifier): """Open a reader for the given rspecifier. Args: rspecifier: The given rspecifier. Returns: A boolean variable indicating if the operation is successful. """ # You may call Open from states kUninitialized and kError. # It may leave the object in any of the states. if self.state != SequentialTableReaderStateType.kUninitialized and \ self.state != SequentialTableReaderStateType.kError: # call Close() yourself to suppress this exception. if not self.Close(): LogError( 'Error closing previous input, rspecifier was \"%s\"' % self.rspecifier) self.rspecifier = rspecifier (rspecifier_type, rxfilename, opts) = ClassifyRspecifier(rspecifier) self.script_rxfilename = rxfilename self.opts = opts if rspecifier_type != RspecifierType.kScriptRspecifier: LogError('Invalid rspecifier type \"%s\"' % rspecifier_type) self.script_input = Input() if not self.script_input.Open(self.script_rxfilename): LogError('Failed opening script file \"%s\"' % self.script_rxfilename) if self.script_input.IsBinary(): self.SetErrorState() LogError('script file should not be in binary format.') else: self.state = SequentialTableReaderStateType.kFileStart self.Next() if self.state == SequentialTableReaderStateType.kError: return False # any other status, including kEof, is OK from the point of view # of the 'open' function (empty scp file is not inherently an # error). return True def IsOpen(self): if self.state == SequentialTableReaderStateType.kEof or \ self.state == SequentialTableReaderStateType.kHaveScpLine or \ self.state == SequentialTableReaderStateType.kHaveObject or \ self.state == SequentialTableReaderStateType.kHaveRange: return True elif self.state == SequentialTableReaderStateType.kUninitialized or \ self.state == SequentialTableReaderStateType.kError: return False else: # note: kFileStart is not a valid state for the user to call a # member function (we never return from a public function in # this state). LogError('Invalid state \"%s\"' % self.state) def Done(self): if self.state == SequentialTableReaderStateType.kHaveScpLine or \ self.state == SequentialTableReaderStateType.kHaveObject or \ self.state == SequentialTableReaderStateType.kHaveRange: return False elif self.state == SequentialTableReaderStateType.kEof or \ self.state == SequentialTableReaderStateType.kError: # Error condition, like Eof, counts as Done(); the # destructor/Close() will inform the user of the error. return True else: LogError('Invalid state \"%s\"' % self.state) def Key(self): if self.state != SequentialTableReaderStateType.kHaveScpLine and \ self.state != SequentialTableReaderStateType.kHaveObject and \ self.state != SequentialTableReaderStateType.kHaveRange: LogError('Invalid state \"%s\"' % self.state) return self.key def Value(self): if not self.EnsureObjectLoaded(): LogError('Failed to load object from \"%s\" to suppress this ' 'error, add the permissive (p, ) option to the ' 'rspecifier.' % self.data_rxfilename) if self.state == SequentialTableReaderStateType.kHaveRange: return self.range_holder.Value() elif self.state == SequentialTableReaderStateType.kHaveObject: return self.holder.Value() else: LogError('Invalid state \"%s\"' % self.state) def Next(self): while True: self.NextScpLine() if self.Done(): return if self.opts.permissive: # Permissive mode means, when reading scp files, we treat keys # whose scp entry cannot be read as nonexistent. This means # trying to read. if self.EnsureObjectLoaded(): return # Success. # else try the next scp line. else: # We go the next key; Value() will crash if we can't read the # object on the scp line. return def Close(self): status = 0 if self.script_input.IsOpen(): status = self.script_input.Close() if self.data_input.IsOpen(): self.data_input.Close() self.range_holder.Clear() self.holder.Clear() if not self.IsOpen(): LogError('Called on input that was not open.') def SetErrorState(self): self.state = SequentialTableReaderStateType.kError self.script_input.Close() self.data_input.Close() self.holder.Clear() self.range_holder.Clear() return True def NextScpLine(self): if self.state == SequentialTableReaderStateType.kHaveRange: sefl.range_holder.Clear() sefl.state = SequentialTableReaderStateType.kHaveObject if self.state != SequentialTableReaderStateType.kHaveScpLine and \ self.state != SequentialTableReaderStateType.kHaveObject and \ self.state != SequentialTableReaderStateType.kFileStart: LogError('Invalid state \"%s\"' % self.state) line = self.script_input.Stream().Readline() if line: token = line.rstrip().split() if len(token) != 2: LogError('Invalid line \"%s\"' % line) self.key = token[0] data_rxfilename = None if token[1].endswith(']'): LogError('Range specifier support not implemented yet.') else: data_rxfilename = token[1] self.range = None filenames_equal = (self.data_rxfilename == data_rxfilename) if not filenames_equal: self.data_rxfilename = data_rxfilename if self.state == SequentialTableReaderStateType.kHaveObject: if not filenames_equal: self.holder.Clear() self.state = SequentialTableReaderStateType.kHaveScpLine else: self.state = SequentialTableReaderStateType.kHaveScpLine else: self.state = SequentialTableReaderStateType.kEof # There is nothing more in the scp file. Might as well close input # streams as we don't need them. self.script_input.Close() if self.data_input.IsOpen(): self.data_input.Close() self.holder.Clear() # clear the holder if it was nonempty. self.range_holder.Clear( ) # clear the range holder if it was nonempty. def EnsureObjectLoaded(self): """Ensures that we have fully loaded any object associated with the current key. Returns: A boolean variable indicating if the operation is successful. """ if self.state != SequentialTableReaderStateType.kHaveScpLine and \ self.state != SequentialTableReaderStateType.kHaveObject and \ self.state != SequentialTableReaderStateType.kHaveRange: LogError('Invalid state \"%s\"' % self.state) if self.state == SequentialTableReaderStateType.kHaveScpLine: success = self.data_input.Open(self.data_rxfilename) if not success: LogError('Failed to open file \"%s\"' % self.data_rxfilename) if self.holder.Read(self.data_input.Stream(), self.data_input.IsBinary()): self.state = SequentialTableReaderStateType.kHaveObject else: LogError('Failed to load object from \"%s\"' % self.data_rxfilename) # At this point the state must be either kHaveObject or kHaveRange. if self.range: LogError('Range specifier support not implemented yet.') return True
def __init__(self, rxfilename=None): self.components = [] if rxfilename is not None: istream = Input(rxfilename) self.Read(istream.Stream(), istream.IsBinary()) istream.Close()