def main(): logger.info("Starting") conn = sqlite3.connect("data/sge_tweets.db") conn.create_function("tokenise", 1, tokenise) conn.create_function("suffixes", 1, tokenise_and_extract_suffixes) c = conn.cursor() return conn, c
def _receive_to_queue(self): try: while True: msg = yield from self.websocket.recv() if msg is None: logger.info( "[{}] Client connection closed.".format( self.websocket.remote_ip) ) break # Attempt to parse JSON try: msg = json.loads(msg) except ValueError: logger.error( "[{}] Bad input from client. " "(Could not parse JSON)".format( self.websocket.remote_ip) ) break yield from self.input_queue.put(msg) logger.info("[{}] [RECV] {}".format( self.websocket.remote_ip, msg) ) except CancelledError: logger.debug( "[{}] CancelledError on receiver -- " "Should not be happening.".format(self.websocket.remote_ip) )
def fetch_search_results(self, query, offset=0, limit=0): start_time = timeit.default_timer() # Pre-process the query. # We will return the query to the client, including canonical forms of # special commands. return_query, fts_query, suffixes_query, suffixes_full_terms = \ self._process_query(query) logger.info("Searching -- FTS:'{}', Suffixes:'{}'.".format( fts_query, suffixes_query)) try: # Get the results for the query, hitting the memoisation caches # if we can. count = self._cached_search_results_count(fts_query, suffixes_query, suffixes_full_terms) results = self._cached_search_results(fts_query, suffixes_query, suffixes_full_terms, offset, limit) elapsed = "{:.3f}".format(timeit.default_timer() - start_time) return { 'total': count, 'results': results, 'query': return_query, 'elapsed': elapsed, 'offset': offset } except (sqlalchemy.exc.SQLAlchemyError, oce.exceptions.CustomError) as e: # Whoops. logger.error(e) return {'total': 0, 'results': 'error'}
def __init__(self, model=default_model, trained_file=default_trained_file): self.model = model self.trained_file = trained_file # === Classifier === self.classifier = self.load_classifier(trained_file) if self.classifier is not None: if not hasattr(self.classifier, 'model_name'): logger.warning( "The loaded classifier does not specify which model it " "uses; it could be different from the one expected. " "Use .train_classifier() followed by .save_classifier() " "to overwrite it.") elif self.classifier.model_name != model: logger.warning("The model used by the loaded classifier (" + self.classifier.model_name + ") is different from the one requested (" + model + "). " "Use .train_classifier() followed by .save_classifier() " "to overwrite it.") else: logger.warning("No previously trained classifier found. (" + trained_file + ")") logger.warning("Use .train_classifier() and .save_classifier() to " "train and save a new classifier respectively.") logger.info("Language ID module initialised.")
def execute(msg): msg_preview = msg[0:preview_length].replace("\n", "\\n").replace("\r", "\\r") if len(msg) > preview_length: msg_preview += "..." self.writer.write(msg.encode()) yield from self.writer.drain() logger.info("[{}] [SEND] {}".format(self.remote_ip, msg_preview))
def load_classifier(self, trained_file): try: f = open(trained_file, 'rb') classifier = pickle.load(f) f.close() logger.info("Loaded previously trained classifier. (" + trained_file + ")") return classifier except FileNotFoundError: return None
def execute(msg): msg_preview = (msg[0:preview_length].replace('\n', '\\n').replace( '\r', '\\r')) if len(msg) > preview_length: msg_preview += "..." self.writer.write(msg.encode()) yield from self.writer.drain() logger.info("[{}] [SEND] {}".format(self.remote_ip, msg_preview))
def save_classifier(self, trained_file=None): classifier = self.get_classifier() if classifier is None: logger.warning("Could not save classifier. (None currently " "initialised.)") return if trained_file is None: trained_file = self.trained_file f = open(trained_file, 'wb') pickle.dump(self.classifier, f) f.close() logger.info("Saved trained classifier to '" + trained_file + "'.") return
def _receive_to_queue(self): try: while True: msg = yield from self.reader.readline() # "If the EOF was received and the internal buffer is empty, # return an empty bytes object." if msg == b"": logger.info("[{}] Client connection closed.".format(self.remote_ip)) break logger.info("[{}] [RECV] {}".format(self.remote_ip, msg)) yield from self.input_queue.put(msg) except CancelledError: logger.debug("[{}] Cancelling receiver...".format(self.remote_ip))
def _receive_to_queue(self): try: while True: msg = yield from self.reader.readline() # "If the EOF was received and the internal buffer is empty, # return an empty bytes object." if msg == b'': logger.info("[{}] Client connection closed.".format( self.remote_ip)) break logger.info("[{}] [RECV] {}".format(self.remote_ip, msg)) yield from self.input_queue.put(msg) except CancelledError: logger.debug("[{}] Cancelling receiver...".format(self.remote_ip))
def __init__(self, port, register_client, deregister_client): super().__init__(port, register_client, deregister_client) self.port = port self.register_client = register_client self.deregister_client = deregister_client self.server = asyncio.start_server(self._new_client_handler, host=None, port=port) s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.connect(("8.8.8.8", 1)) # Google public DNS server self.local_ip = s.getsockname()[0] self.handler_list = [] self.client_list = [] logger.info("Telnet server starting on {}, port {}.".format(self.local_ip, self.port)) self.server = asyncio.get_event_loop().run_until_complete(self.server)
def __init__(self, **kwargs): """ **kwargs should specify exactly one provider class and at least one interface class. """ logger.info("=== Controller Initialisation ===") # Bring up data providers and server interfaces self.provider = None self.servers = [] # Client list - Servers will register their clients with us as they come self.clients = [] # And when they do, this future will get resolved and recreated. self.clients_changed = asyncio.Future() # So that we can watch them for input # This is a list of tuples: (ClientInterface, Future) self.client_watch = [] for key, value in kwargs.items(): if value is None: # The user did not specify this provider/interface as a server # parameter, and its default settings have been disabled. continue if key in provider_classes.keys(): if self.provider is None: self.provider = provider_classes[key](value) else: raise oce.exceptions.CustomError("More than one data provider specified.") elif key in interface_classes.keys(): # Interfaces also need to be passed our client de-/registration # functions server = interface_classes[key](value, self.register_client, self.deregister_client) self.servers.append(server) else: raise oce.exceptions.CustomError("Invalid provider/interface: '{}'".format(key)) # Make sure they're up if self.provider is None: raise oce.exceptions.CustomError("No data provider specified.") if len(self.servers) == 0: raise oce.exceptions.CustomError("No interfaces specified.") # Lang ID module self.langid = oce.langid.LangIDController()
def _send_from_queue(self): try: while True: msg = yield from self.output_queue.get() msg = json.dumps(msg) msg_preview = msg[0:80] msg = base64.b64encode(zlib.compress(msg.encode())).decode() if not self.websocket.open: logger.error( "[{}] Send error: Socket closed unexpectedly.".format( self.websocket.remote_ip)) break yield from self.websocket.send(msg) logger.info("[{}] [SEND] {}...".format( self.websocket.remote_ip, msg_preview)) except CancelledError: logger.debug("[{}] Cancelling sender...".format( self.websocket.remote_ip))
def __init__(self, port, register_client, deregister_client): """ Starts up a websocket server on the given port """ super().__init__(port, register_client, deregister_client) self.port = port self.register_client = register_client self.deregister_client = deregister_client # We're going to jury-rig a new class that inherits from # websockets.server.WebSocketServerProtocol so that we can save the # remote IP address of incoming connections. class CustomWebSocketServerProtocol(WebSocketServerProtocol): def __init__(self, *args, **kwargs): self.remote_ip = "" super().__init__(*args, **kwargs) def connection_made(self, transport): # The remote IP address will be available as websocket.remote_ip self.remote_ip = transport.get_extra_info('peername')[0] super().connection_made(transport) # This just sets the server options; the server itself is only # available after the self.server is actually run via the event loop self.server = websockets.serve(self._new_client_handler, host=None, port=port, klass=CustomWebSocketServerProtocol) # Small hack to try to get a usable local IP address # (Connecting to a UDP address doesn't send packets) s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.connect(('8.8.8.8', 1)) # Google public DNS server self.local_ip = s.getsockname()[0] self.client_list = [] self.handler_list = [] logger.info( "Websocket server starting on {}, port {}.".format(self.local_ip, self.port) ) self.server = asyncio.get_event_loop().run_until_complete(self.server)
def __init__(self, port, register_client, deregister_client): super().__init__(port, register_client, deregister_client) self.port = port self.register_client = register_client self.deregister_client = deregister_client self.server = asyncio.start_server(self._new_client_handler, host=None, port=port) s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.connect(('8.8.8.8', 1)) # Google public DNS server self.local_ip = s.getsockname()[0] self.handler_list = [] self.client_list = [] logger.info("Telnet server starting on {}, port {}.".format( self.local_ip, self.port)) self.server = asyncio.get_event_loop().run_until_complete(self.server)
def __init__(self, port, register_client, deregister_client): """ Starts up a websocket server on the given port """ super().__init__(port, register_client, deregister_client) self.port = port self.register_client = register_client self.deregister_client = deregister_client # We're going to jury-rig a new class that inherits from # websockets.server.WebSocketServerProtocol so that we can save the # remote IP address of incoming connections. class CustomWebSocketServerProtocol(WebSocketServerProtocol): def __init__(self, *args, **kwargs): self.remote_ip = "" super().__init__(*args, **kwargs) def connection_made(self, transport): # The remote IP address will be available as websocket.remote_ip self.remote_ip = transport.get_extra_info('peername')[0] super().connection_made(transport) # This just sets the server options; the server itself is only # available after the self.server is actually run via the event loop self.server = websockets.serve(self._new_client_handler, host=None, port=port, klass=CustomWebSocketServerProtocol) # Small hack to try to get a usable local IP address # (Connecting to a UDP address doesn't send packets) s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.connect(('8.8.8.8', 1)) # Google public DNS server self.local_ip = s.getsockname()[0] self.client_list = [] self.handler_list = [] logger.info("Websocket server starting on {}, port {}.".format( self.local_ip, self.port)) self.server = asyncio.get_event_loop().run_until_complete(self.server)
def update_record(self, row_id, field, value): try: for row in self.session.query(Records) \ .filter(Records.rowid == row_id): if not hasattr(row, field): logger.warning( "Invalid field name given by client: '{}'".format( field)) return 'invalid_field' # ==================== # Field-specific hooks # ==================== # Process record tags to update tweets_tags table if field == 'tag': old_tags = fts_detag('tag', row.tag) self._update_tags(value, old_tags) elif field == 'language': value = langid_normalise_language(value) # ==================== # Add FTS tags if needed value = fts_tag(field, value) logger.info("Updating '{0}' on row {1}: {2} -> {3}".format( field, row.rowid, str(getattr(row, field)).replace('\n', '\\n'), str(value).replace('\n', '\\n'))) setattr(row, field, value) self.session.commit() # Also clear all memoisation caches, in case the update # invalidates their results self._clear_caches() return 'success' except sqlalchemy.exc.SQLAlchemyError as e: # Uh oh. logger.error(e) return 'error'
def _send_from_queue(self): try: while True: msg = yield from self.output_queue.get() msg = json.dumps(msg) msg_preview = msg[0:80] msg = base64.b64encode(zlib.compress(msg.encode())).decode() if not self.websocket.open: logger.error( "[{}] Send error: Socket closed unexpectedly.".format( self.websocket.remote_ip)) break yield from self.websocket.send(msg) logger.info("[{}] [SEND] {}...".format( self.websocket.remote_ip, msg_preview) ) except CancelledError: logger.debug("[{}] Cancelling sender...".format( self.websocket.remote_ip))
def communicate_until_closed(self): logger.info("[{}] New client.".format(self.websocket.remote_ip)) # Bring up the communication coroutines and wait for them. # They all run infinite loops, so if any one of them completes, it # means the client is no longer active. communication_tasks = [ asyncio. async (self._receive_to_queue()), asyncio. async (self._send_from_queue()) ] done, pending = yield from asyncio.wait(communication_tasks, return_when=FIRST_COMPLETED) logger.info("[{}] Cleaning up client...".format( self.websocket.remote_ip)) for task in done: e = task.exception() if isinstance(e, Exception): # If any of our tasks threw an exception, re-raise it instead of # failing silently. raise e # Cancel any hangers-on (viz., _send_from_queue()) for task in pending: task.cancel() yield from asyncio.wait(pending) logger.info("[{}] Cleanup complete.".format(self.websocket.remote_ip))
def communicate_until_closed(self): logger.info("[{}] New client.".format(self.websocket.remote_ip)) # Bring up the communication coroutines and wait for them. # They all run infinite loops, so if any one of them completes, it # means the client is no longer active. communication_tasks = [asyncio.async(self._receive_to_queue()), asyncio.async(self._send_from_queue())] done, pending = yield from asyncio.wait(communication_tasks, return_when=FIRST_COMPLETED) logger.info( "[{}] Cleaning up client...".format(self.websocket.remote_ip) ) for task in done: e = task.exception() if isinstance(e, Exception): # If any of our tasks threw an exception, re-raise it instead of # failing silently. raise e # Cancel any hangers-on (viz., _send_from_queue()) for task in pending: task.cancel() yield from asyncio.wait(pending) logger.info("[{}] Cleanup complete.".format(self.websocket.remote_ip))
def shutdown(self): logger.info("Shutting down client watchers...") for x in self.client_watch: # The client watchers are coroutines x[1].cancel() yield from x[1] logger.info("Shutting down interfaces...") for server in self.servers: # The connection manager uses coroutines yield from server.shutdown() self.servers = [] logger.info("Shutting down data provider...") self.provider.shutdown() self.provider = None logger.info("Shutting down langid module...") self.langid.shutdown() self.langid = None
def shutdown(self): """ A coroutine that gracefully destroys the server """ logger.info("Shutting down Websocket server...") if len(self.client_list) > 0: logger.info("Kicking connected Websocket clients...") for client in self.client_list: yield from client.close() # The handlers in handler_list only complete once their clients are # completely closed and deregistered. if len(self.handler_list) > 0: yield from asyncio.wait(self.handler_list) self.server.close() yield from self.server.wait_closed() logger.info("Websocket server shutdown complete.")
def communicate_until_closed(self): logger.info("[{}] New telnet client.".format(self.remote_ip)) communication_tasks = [ asyncio. async (self._receive_to_queue()), asyncio. async (self.parser.run_parser()), asyncio. async (self._send_from_queue()), self.kill_switch ] done, pending = yield from asyncio.wait(communication_tasks, return_when=FIRST_COMPLETED) logger.info("[{}] Cleaning up client...".format(self.remote_ip)) got_exception = None for task in done: e = task.exception() if isinstance(e, TelnetExit): # No need to handle this here; the client will be closed anyway. pass elif isinstance(e, Exception): # If any of our tasks threw a different exception, re-raise it # instead of failing silently. got_exception = e # Make sure we cancel the tasks in order, so that last minute # messages can still get sent. for task in communication_tasks: if not task.done(): task.cancel() # self.kill_switch is a simple Future; it doesn't need to # clean up. if task != self.kill_switch: yield from task yield from self._close() logger.info("[{}] Cleanup complete.".format(self.remote_ip)) if got_exception is not None: print(got_exception) raise got_exception
def communicate_until_closed(self): logger.info("[{}] New telnet client.".format(self.remote_ip)) communication_tasks = [ asyncio.async(self._receive_to_queue()), asyncio.async(self.parser.run_parser()), asyncio.async(self._send_from_queue()), self.kill_switch, ] done, pending = yield from asyncio.wait(communication_tasks, return_when=FIRST_COMPLETED) logger.info("[{}] Cleaning up client...".format(self.remote_ip)) got_exception = None for task in done: e = task.exception() if isinstance(e, TelnetExit): # No need to handle this here; the client will be closed anyway. pass elif isinstance(e, Exception): # If any of our tasks threw a different exception, re-raise it # instead of failing silently. got_exception = e # Make sure we cancel the tasks in order, so that last minute # messages can still get sent. for task in communication_tasks: if not task.done(): task.cancel() # self.kill_switch is a simple Future; it doesn't need to # clean up. if task != self.kill_switch: yield from task yield from self._close() logger.info("[{}] Cleanup complete.".format(self.remote_ip)) if got_exception is not None: print(got_exception) raise got_exception
def shutdown(self): self.session.commit() self.session.close() logger.info("SQLite data provider shut down.")