def _decode_raw(self, data): ''' @see AudioInput._decode_raw() ''' # Decode the raw bytes self._decoder.start_utt() self._decoder.process_raw(data, False, True) self._decoder.end_utt() tokens = [] for seg in self._decoder.seg(): word = seg.word prob = seg.prob vrbl = True # Start and end tokens if word is '<s>' or word is '</s>': continue # Non-verbal tokens if ('<' in word or '>' in word or '[' in word or ']' in word): vrbl = False # Strip any "(...)" appendage which details the path if '(' in word: word = word[:word.index('(')] # Save as a token in the result tokens.append(Token(word, prob, vrbl)) # We're done! return tokens
def _handle(self, sckt): ''' Handle reading from a socket ''' LOG.info("Started new socket handler") # We'll build these up tokens = [] cur = b'' # Loop until they go away while True: c = sckt.recv(1) if c is None or len(c) == 0: LOG.info("Peer closed connection") return if len(cur) == 0 and ord(c) == 4: LOG.info("Got EOT") try: sckt.close() except: pass return if c in b' \t\n': if len(cur) > 0: tokens.append(Token(cur.strip().decode(), 1.0, True)) cur = b'' if c == b'\n': self._output.append(tokens) tokens = [] else: cur += c
def _decode(self): """ @see AudioInput._decode() """ # Collect anything remaining self._add_result(self._recognizer.FinalResult()) # Ensure it's clear for next time self._recognizer.Reset() # Tokenize tokens = [] LOG.debug("Decoding: %s" % self._results) for result in self._results: word = result.get('word', '').strip() conf = result.get('conf', 0.0) if word and conf: tokens.append(Token(word, conf, True)) # Done self._results = [] # And give them all back LOG.debug("Got: %s" % ' '.join(str(i) for i in tokens)) return tokens
def tokenize(string): if string and str(string).strip(): return [Token(word.strip(), 1.0, True) for word in str(string).strip().split() if word] else: return []
def _decode(self): """ @see AudioInput._decode() """ if self._sckt is None: # No context means no tokens LOG.warning("Had no stream context to close") return [] try: # Send the EOD token self._sckt.sendall(struct.pack('!q', -1)) # Get back the result: # 8 bytes for the length # data... LOG.info("Waiting for result...") length = b'' while len(length) < 8: got = self._sckt.recv(8 - len(length)) if len(got) == 0: raise IOError("EOF in recv()") length += got (count, ) = struct.unpack("!q", length) # Read in the string LOG.info("Reading %d chars" % (count, )) result = b'' while len(result) < count: got = self._sckt.recv(count - len(result)) if len(got) == 0: raise IOError("EOF in recv()") result += got result = result.decode() LOG.info("Result is: '%s'" % (result, )) # Convert to tokens tokens = [ Token(word.strip(), 1.0, True) for word in result.split(' ') if word.strip() != '' ] return tokens except Exception as e: # Again, just grumble on exceptions LOG.info("Failed to do remote processing: %s" % e) return [] finally: # Close it out, best effort try: LOG.info("Closing connection") self._sckt.shutdown(socket.SHUT_RDWR) self._sckt.close() except: pass finally: self._sckt = None
def _decode_raw(self, data): ''' @see AudioInput._decode_raw() ''' audio = numpy.frombuffer(data, numpy.int16) words = self._model.stt(audio, self._rate) LOG.info("Got: %s" % (words, )) tokens = [ Token(word.strip(), 1.0, True) for word in words.split(' ') if len(word.strip()) > 0 ] return tokens
def _handle(self, sckt): """ Handle reading from a socket """ LOG.info("Started new socket handler") # We'll build these up tokens = [] cur = b'' # Loop until they go away while True: c = sckt.recv(1) if c is None or len(c) == 0: LOG.info("Peer closed connection") return if len(cur) == 0 and ord(c) == 4: LOG.info("Got EOT") try: sckt.close() except: pass return if c in b' \t\n': if len(cur.strip()) > 0: try: tokens.append(Token(cur.strip().decode(), 1.0, True)) except Exception as e: LOG.error("Error handling '%s': %s", cur, e) cur = b'' if c == b'\n': if len(tokens) > 0: if self._prefix: tokens = self._prefix + tokens self._output.append(tokens) tokens = [] else: cur += c
def _decode(self): """ @see AudioInput._decode() """ if self._context is None: # No context means no tokens LOG.warning("Had no stream context to close") tokens = [] else: # Finish up by finishing the decoding words = self._context.finishStream() LOG.info("Got: %s" % (words, )) self._context = None # And tokenize tokens = [ Token(word.strip(), 1.0, True) for word in words.split(' ') if len(word.strip()) > 0 ] return tokens
def __init__(self, state, port=8008, prefix=None): """ @see Input.__init__() :type port: int :param port: The port to listen on. :type prefix: str :param prefix: What to prefix to the beginning of any input. """ super(SocketInput, self).__init__(state) self._port = int(port) if prefix and str(prefix).strip(): self._prefix = [ Token(word.strip(), 1.0, True) for word in str(prefix).strip().split() if word ] else: self._prefix = None self._socket = None self._output = []
def tokenise(string): """ Turn a string into a list of tokens. """ from dexter.input import Token return [Token(e, 1.0, True) for e in string.split(' ')]
def _decode_raw(self, data): ''' @see AudioInput._decode_raw() ''' # Handle funy inputs if data is None or len(data) == 0: return [] # Info in the header header = struct.pack('!qqqq', self._channels, self._width, self._rate, len(data)) # Connect LOG.info("Opening connection to %s:%d" % ( self._host, self._port, )) try: # Connect sckt = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sckt.connect((self._host, self._port)) # Send off our query LOG.info("Sending %d bytes of data to %s" % (len(data), self._host)) sckt.sendall(header) sckt.sendall(data) # Get back the result: # 8 bytes for the length # data... LOG.info("Waiting for result...") length = b'' while len(length) < 8: length += sckt.recv(8 - len(length)) (count, ) = struct.unpack("!q", length) # Read in the string LOG.info("Reading %d chars" % (count, )) result = b'' while len(result) < count: result += sckt.recv(count - len(result)) result = result.decode() LOG.info("Result is: '%s'" % (result, )) except Exception as e: # Don't kill the thread by throwing an exception, just grumble LOG.info("Failed to do remote processing: %s" % e) return [] finally: # Close it out, best effort try: LOG.info("Closing connection") sckt.shutdown(socket.SHUT_RDWR) sckt.close() except: pass # Convert to tokens tokens = [ Token(word.strip(), 1.0, True) for word in result.split(' ') if word.strip() != '' ] return tokens