def main():
    logger.info("Starting")
    conn = sqlite3.connect("data/sge_tweets.db")
    conn.create_function("tokenise", 1, tokenise)
    conn.create_function("suffixes", 1, tokenise_and_extract_suffixes)
    c = conn.cursor()
    return conn, c
Пример #2
0
    def _receive_to_queue(self):
        try:
            while True:
                msg = yield from self.websocket.recv()
                if msg is None:
                    logger.info(
                        "[{}] Client connection closed.".format(
                            self.websocket.remote_ip)
                    )
                    break

                # Attempt to parse JSON
                try:
                    msg = json.loads(msg)
                except ValueError:
                    logger.error(
                        "[{}] Bad input from client. "
                        "(Could not parse JSON)".format(
                            self.websocket.remote_ip)
                    )
                    break

                yield from self.input_queue.put(msg)
                logger.info("[{}] [RECV] {}".format(
                    self.websocket.remote_ip,
                    msg)
                )
        except CancelledError:
            logger.debug(
                "[{}] CancelledError on receiver -- "
                "Should not be happening.".format(self.websocket.remote_ip)
            )
Пример #3
0
    def fetch_search_results(self, query, offset=0, limit=0):
        start_time = timeit.default_timer()

        # Pre-process the query.
        # We will return the query to the client, including canonical forms of
        # special commands.
        return_query, fts_query, suffixes_query, suffixes_full_terms = \
            self._process_query(query)

        logger.info("Searching -- FTS:'{}', Suffixes:'{}'.".format(
            fts_query, suffixes_query))

        try:
            # Get the results for the query, hitting the memoisation caches
            # if we can.
            count = self._cached_search_results_count(fts_query,
                                                      suffixes_query,
                                                      suffixes_full_terms)
            results = self._cached_search_results(fts_query, suffixes_query,
                                                  suffixes_full_terms, offset,
                                                  limit)
            elapsed = "{:.3f}".format(timeit.default_timer() - start_time)
            return {
                'total': count,
                'results': results,
                'query': return_query,
                'elapsed': elapsed,
                'offset': offset
            }
        except (sqlalchemy.exc.SQLAlchemyError,
                oce.exceptions.CustomError) as e:
            # Whoops.
            logger.error(e)
            return {'total': 0, 'results': 'error'}
Пример #4
0
    def __init__(self, model=default_model, trained_file=default_trained_file):
        self.model = model
        self.trained_file = trained_file

        # === Classifier ===
        self.classifier = self.load_classifier(trained_file)
        if self.classifier is not None:
            if not hasattr(self.classifier, 'model_name'):
                logger.warning(
                    "The loaded classifier does not specify which model it "
                    "uses; it could be different from the one expected.  "
                    "Use .train_classifier() followed by .save_classifier() "
                    "to overwrite it.")
            elif self.classifier.model_name != model:
                logger.warning("The model used by the loaded classifier (" +
                               self.classifier.model_name +
                               ") is different from the one requested (" +
                               model +
                               ").  "
                               "Use .train_classifier() followed by .save_classifier() "
                               "to overwrite it.")
        else:
            logger.warning("No previously trained classifier found. (" +
                           trained_file + ")")
            logger.warning("Use .train_classifier() and .save_classifier() to "
                           "train and save a new classifier respectively.")

        logger.info("Language ID module initialised.")
Пример #5
0
def main():
    logger.info("Starting")
    conn = sqlite3.connect("data/sge_tweets.db")
    conn.create_function("tokenise", 1, tokenise)
    conn.create_function("suffixes", 1, tokenise_and_extract_suffixes)
    c = conn.cursor()
    return conn, c
Пример #6
0
        def execute(msg):
            msg_preview = msg[0:preview_length].replace("\n", "\\n").replace("\r", "\\r")

            if len(msg) > preview_length:
                msg_preview += "..."

            self.writer.write(msg.encode())
            yield from self.writer.drain()
            logger.info("[{}] [SEND] {}".format(self.remote_ip, msg_preview))
Пример #7
0
 def load_classifier(self, trained_file):
     try:
         f = open(trained_file, 'rb')
         classifier = pickle.load(f)
         f.close()
         logger.info("Loaded previously trained classifier. (" +
                     trained_file + ")")
         return classifier
     except FileNotFoundError:
         return None
Пример #8
0
        def execute(msg):
            msg_preview = (msg[0:preview_length].replace('\n', '\\n').replace(
                '\r', '\\r'))

            if len(msg) > preview_length:
                msg_preview += "..."

            self.writer.write(msg.encode())
            yield from self.writer.drain()
            logger.info("[{}] [SEND] {}".format(self.remote_ip, msg_preview))
Пример #9
0
    def save_classifier(self, trained_file=None):
        classifier = self.get_classifier()
        if classifier is None:
            logger.warning("Could not save classifier. (None currently "
                           "initialised.)")
            return

        if trained_file is None:
            trained_file = self.trained_file
        f = open(trained_file, 'wb')
        pickle.dump(self.classifier, f)
        f.close()
        logger.info("Saved trained classifier to '" + trained_file + "'.")
        return
Пример #10
0
    def _receive_to_queue(self):
        try:
            while True:
                msg = yield from self.reader.readline()

                # "If the EOF was received and the internal buffer is empty,
                # return an empty bytes object."
                if msg == b"":
                    logger.info("[{}] Client connection closed.".format(self.remote_ip))
                    break

                logger.info("[{}] [RECV] {}".format(self.remote_ip, msg))
                yield from self.input_queue.put(msg)

        except CancelledError:
            logger.debug("[{}] Cancelling receiver...".format(self.remote_ip))
Пример #11
0
    def _receive_to_queue(self):
        try:
            while True:
                msg = yield from self.reader.readline()

                # "If the EOF was received and the internal buffer is empty,
                # return an empty bytes object."
                if msg == b'':
                    logger.info("[{}] Client connection closed.".format(
                        self.remote_ip))
                    break

                logger.info("[{}] [RECV] {}".format(self.remote_ip, msg))
                yield from self.input_queue.put(msg)

        except CancelledError:
            logger.debug("[{}] Cancelling receiver...".format(self.remote_ip))
Пример #12
0
    def __init__(self, port, register_client, deregister_client):
        super().__init__(port, register_client, deregister_client)
        self.port = port
        self.register_client = register_client
        self.deregister_client = deregister_client

        self.server = asyncio.start_server(self._new_client_handler, host=None, port=port)

        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        s.connect(("8.8.8.8", 1))  # Google public DNS server
        self.local_ip = s.getsockname()[0]

        self.handler_list = []
        self.client_list = []

        logger.info("Telnet server starting on {}, port {}.".format(self.local_ip, self.port))
        self.server = asyncio.get_event_loop().run_until_complete(self.server)
Пример #13
0
    def __init__(self, **kwargs):
        """
        **kwargs should specify exactly one provider class and at least one
        interface class.
        """
        logger.info("=== Controller Initialisation ===")

        # Bring up data providers and server interfaces
        self.provider = None
        self.servers = []

        # Client list - Servers will register their clients with us as they come
        self.clients = []
        # And when they do, this future will get resolved and recreated.
        self.clients_changed = asyncio.Future()
        # So that we can watch them for input
        # This is a list of tuples: (ClientInterface, Future)
        self.client_watch = []

        for key, value in kwargs.items():
            if value is None:
                # The user did not specify this provider/interface as a server
                # parameter, and its default settings have been disabled.
                continue
            if key in provider_classes.keys():
                if self.provider is None:
                    self.provider = provider_classes[key](value)
                else:
                    raise oce.exceptions.CustomError("More than one data provider specified.")
            elif key in interface_classes.keys():
                # Interfaces also need to be passed our client de-/registration
                # functions
                server = interface_classes[key](value, self.register_client, self.deregister_client)
                self.servers.append(server)
            else:
                raise oce.exceptions.CustomError("Invalid provider/interface: '{}'".format(key))

        # Make sure they're up
        if self.provider is None:
            raise oce.exceptions.CustomError("No data provider specified.")
        if len(self.servers) == 0:
            raise oce.exceptions.CustomError("No interfaces specified.")

        # Lang ID module
        self.langid = oce.langid.LangIDController()
Пример #14
0
    def _send_from_queue(self):
        try:
            while True:
                msg = yield from self.output_queue.get()
                msg = json.dumps(msg)
                msg_preview = msg[0:80]
                msg = base64.b64encode(zlib.compress(msg.encode())).decode()

                if not self.websocket.open:
                    logger.error(
                        "[{}] Send error: Socket closed unexpectedly.".format(
                            self.websocket.remote_ip))
                    break
                yield from self.websocket.send(msg)
                logger.info("[{}] [SEND] {}...".format(
                    self.websocket.remote_ip, msg_preview))
        except CancelledError:
            logger.debug("[{}] Cancelling sender...".format(
                self.websocket.remote_ip))
Пример #15
0
    def __init__(self, port, register_client, deregister_client):
        """
        Starts up a websocket server on the given port
        """
        super().__init__(port, register_client, deregister_client)
        self.port = port
        self.register_client = register_client
        self.deregister_client = deregister_client

        # We're going to jury-rig a new class that inherits from
        # websockets.server.WebSocketServerProtocol so that we can save the
        # remote IP address of incoming connections.
        class CustomWebSocketServerProtocol(WebSocketServerProtocol):
            def __init__(self, *args, **kwargs):
                self.remote_ip = ""
                super().__init__(*args, **kwargs)

            def connection_made(self, transport):
                # The remote IP address will be available as websocket.remote_ip
                self.remote_ip = transport.get_extra_info('peername')[0]
                super().connection_made(transport)

        # This just sets the server options; the server itself is only
        # available after the self.server is actually run via the event loop
        self.server = websockets.serve(self._new_client_handler,
                                       host=None,
                                       port=port,
                                       klass=CustomWebSocketServerProtocol)

        # Small hack to try to get a usable local IP address
        # (Connecting to a UDP address doesn't send packets)
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        s.connect(('8.8.8.8', 1))  # Google public DNS server
        self.local_ip = s.getsockname()[0]

        self.client_list = []
        self.handler_list = []

        logger.info(
            "Websocket server starting on {}, port {}.".format(self.local_ip,
                                                               self.port)
        )
        self.server = asyncio.get_event_loop().run_until_complete(self.server)
Пример #16
0
    def __init__(self, port, register_client, deregister_client):
        super().__init__(port, register_client, deregister_client)
        self.port = port
        self.register_client = register_client
        self.deregister_client = deregister_client

        self.server = asyncio.start_server(self._new_client_handler,
                                           host=None,
                                           port=port)

        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        s.connect(('8.8.8.8', 1))  # Google public DNS server
        self.local_ip = s.getsockname()[0]

        self.handler_list = []
        self.client_list = []

        logger.info("Telnet server starting on {}, port {}.".format(
            self.local_ip, self.port))
        self.server = asyncio.get_event_loop().run_until_complete(self.server)
Пример #17
0
    def __init__(self, port, register_client, deregister_client):
        """
        Starts up a websocket server on the given port
        """
        super().__init__(port, register_client, deregister_client)
        self.port = port
        self.register_client = register_client
        self.deregister_client = deregister_client

        # We're going to jury-rig a new class that inherits from
        # websockets.server.WebSocketServerProtocol so that we can save the
        # remote IP address of incoming connections.
        class CustomWebSocketServerProtocol(WebSocketServerProtocol):
            def __init__(self, *args, **kwargs):
                self.remote_ip = ""
                super().__init__(*args, **kwargs)

            def connection_made(self, transport):
                # The remote IP address will be available as websocket.remote_ip
                self.remote_ip = transport.get_extra_info('peername')[0]
                super().connection_made(transport)

        # This just sets the server options; the server itself is only
        # available after the self.server is actually run via the event loop
        self.server = websockets.serve(self._new_client_handler,
                                       host=None,
                                       port=port,
                                       klass=CustomWebSocketServerProtocol)

        # Small hack to try to get a usable local IP address
        # (Connecting to a UDP address doesn't send packets)
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        s.connect(('8.8.8.8', 1))  # Google public DNS server
        self.local_ip = s.getsockname()[0]

        self.client_list = []
        self.handler_list = []

        logger.info("Websocket server starting on {}, port {}.".format(
            self.local_ip, self.port))
        self.server = asyncio.get_event_loop().run_until_complete(self.server)
Пример #18
0
    def update_record(self, row_id, field, value):
        try:
            for row in self.session.query(Records) \
                    .filter(Records.rowid == row_id):

                if not hasattr(row, field):
                    logger.warning(
                        "Invalid field name given by client: '{}'".format(
                            field))
                    return 'invalid_field'

                # ====================
                # Field-specific hooks
                # ====================
                # Process record tags to update tweets_tags table
                if field == 'tag':
                    old_tags = fts_detag('tag', row.tag)
                    self._update_tags(value, old_tags)
                elif field == 'language':
                    value = langid_normalise_language(value)
                # ====================

                # Add FTS tags if needed
                value = fts_tag(field, value)
                logger.info("Updating '{0}' on row {1}: {2} -> {3}".format(
                    field, row.rowid,
                    str(getattr(row, field)).replace('\n', '\\n'),
                    str(value).replace('\n', '\\n')))
                setattr(row, field, value)

                self.session.commit()

            # Also clear all memoisation caches, in case the update
            # invalidates their results
            self._clear_caches()

            return 'success'
        except sqlalchemy.exc.SQLAlchemyError as e:
            # Uh oh.
            logger.error(e)
            return 'error'
Пример #19
0
    def _send_from_queue(self):
        try:
            while True:
                msg = yield from self.output_queue.get()
                msg = json.dumps(msg)
                msg_preview = msg[0:80]
                msg = base64.b64encode(zlib.compress(msg.encode())).decode()

                if not self.websocket.open:
                    logger.error(
                        "[{}] Send error: Socket closed unexpectedly.".format(
                            self.websocket.remote_ip))
                    break
                yield from self.websocket.send(msg)
                logger.info("[{}] [SEND] {}...".format(
                    self.websocket.remote_ip,
                    msg_preview)
                )
        except CancelledError:
            logger.debug("[{}] Cancelling sender...".format(
                self.websocket.remote_ip))
Пример #20
0
    def communicate_until_closed(self):
        logger.info("[{}] New client.".format(self.websocket.remote_ip))

        # Bring up the communication coroutines and wait for them.
        # They all run infinite loops, so if any one of them completes, it
        # means the client is no longer active.
        communication_tasks = [
            asyncio. async (self._receive_to_queue()),
            asyncio. async (self._send_from_queue())
        ]
        done, pending = yield from asyncio.wait(communication_tasks,
                                                return_when=FIRST_COMPLETED)

        logger.info("[{}] Cleaning up client...".format(
            self.websocket.remote_ip))

        for task in done:
            e = task.exception()
            if isinstance(e, Exception):
                # If any of our tasks threw an exception, re-raise it instead of
                # failing silently.
                raise e

        # Cancel any hangers-on (viz., _send_from_queue())
        for task in pending:
            task.cancel()
        yield from asyncio.wait(pending)

        logger.info("[{}] Cleanup complete.".format(self.websocket.remote_ip))
Пример #21
0
    def communicate_until_closed(self):
        logger.info("[{}] New client.".format(self.websocket.remote_ip))

        # Bring up the communication coroutines and wait for them.
        # They all run infinite loops, so if any one of them completes, it
        # means the client is no longer active.
        communication_tasks = [asyncio.async(self._receive_to_queue()),
                               asyncio.async(self._send_from_queue())]
        done, pending = yield from asyncio.wait(communication_tasks,
                                                return_when=FIRST_COMPLETED)

        logger.info(
            "[{}] Cleaning up client...".format(self.websocket.remote_ip)
        )

        for task in done:
            e = task.exception()
            if isinstance(e, Exception):
                # If any of our tasks threw an exception, re-raise it instead of
                # failing silently.
                raise e

        # Cancel any hangers-on (viz., _send_from_queue())
        for task in pending:
            task.cancel()
        yield from asyncio.wait(pending)

        logger.info("[{}] Cleanup complete.".format(self.websocket.remote_ip))
Пример #22
0
    def shutdown(self):
        logger.info("Shutting down client watchers...")
        for x in self.client_watch:
            # The client watchers are coroutines
            x[1].cancel()
            yield from x[1]

        logger.info("Shutting down interfaces...")
        for server in self.servers:
            # The connection manager uses coroutines
            yield from server.shutdown()
        self.servers = []

        logger.info("Shutting down data provider...")
        self.provider.shutdown()
        self.provider = None

        logger.info("Shutting down langid module...")
        self.langid.shutdown()
        self.langid = None
Пример #23
0
    def shutdown(self):
        """
        A coroutine that gracefully destroys the server
        """
        logger.info("Shutting down Websocket server...")
        if len(self.client_list) > 0:
            logger.info("Kicking connected Websocket clients...")
            for client in self.client_list:
                yield from client.close()

        # The handlers in handler_list only complete once their clients are
        # completely closed and deregistered.
        if len(self.handler_list) > 0:
            yield from asyncio.wait(self.handler_list)

        self.server.close()
        yield from self.server.wait_closed()
        logger.info("Websocket server shutdown complete.")
Пример #24
0
    def shutdown(self):
        """
        A coroutine that gracefully destroys the server
        """
        logger.info("Shutting down Websocket server...")
        if len(self.client_list) > 0:
            logger.info("Kicking connected Websocket clients...")
            for client in self.client_list:
                yield from client.close()

        # The handlers in handler_list only complete once their clients are
        # completely closed and deregistered.
        if len(self.handler_list) > 0:
            yield from asyncio.wait(self.handler_list)

        self.server.close()
        yield from self.server.wait_closed()
        logger.info("Websocket server shutdown complete.")
Пример #25
0
    def communicate_until_closed(self):
        logger.info("[{}] New telnet client.".format(self.remote_ip))

        communication_tasks = [
            asyncio. async (self._receive_to_queue()),
            asyncio. async (self.parser.run_parser()),
            asyncio. async (self._send_from_queue()), self.kill_switch
        ]
        done, pending = yield from asyncio.wait(communication_tasks,
                                                return_when=FIRST_COMPLETED)

        logger.info("[{}] Cleaning up client...".format(self.remote_ip))

        got_exception = None
        for task in done:
            e = task.exception()
            if isinstance(e, TelnetExit):
                # No need to handle this here; the client will be closed anyway.
                pass
            elif isinstance(e, Exception):
                # If any of our tasks threw a different exception, re-raise it
                # instead of failing silently.
                got_exception = e

        # Make sure we cancel the tasks in order, so that last minute
        # messages can still get sent.
        for task in communication_tasks:
            if not task.done():
                task.cancel()
                # self.kill_switch is a simple Future; it doesn't need to
                # clean up.
                if task != self.kill_switch:
                    yield from task

        yield from self._close()

        logger.info("[{}] Cleanup complete.".format(self.remote_ip))

        if got_exception is not None:
            print(got_exception)
            raise got_exception
Пример #26
0
    def communicate_until_closed(self):
        logger.info("[{}] New telnet client.".format(self.remote_ip))

        communication_tasks = [
            asyncio.async(self._receive_to_queue()),
            asyncio.async(self.parser.run_parser()),
            asyncio.async(self._send_from_queue()),
            self.kill_switch,
        ]
        done, pending = yield from asyncio.wait(communication_tasks, return_when=FIRST_COMPLETED)

        logger.info("[{}] Cleaning up client...".format(self.remote_ip))

        got_exception = None
        for task in done:
            e = task.exception()
            if isinstance(e, TelnetExit):
                # No need to handle this here; the client will be closed anyway.
                pass
            elif isinstance(e, Exception):
                # If any of our tasks threw a different exception, re-raise it
                # instead of failing silently.
                got_exception = e

        # Make sure we cancel the tasks in order, so that last minute
        # messages can still get sent.
        for task in communication_tasks:
            if not task.done():
                task.cancel()
                # self.kill_switch is a simple Future; it doesn't need to
                # clean up.
                if task != self.kill_switch:
                    yield from task

        yield from self._close()

        logger.info("[{}] Cleanup complete.".format(self.remote_ip))

        if got_exception is not None:
            print(got_exception)
            raise got_exception
Пример #27
0
 def shutdown(self):
     self.session.commit()
     self.session.close()
     logger.info("SQLite data provider shut down.")