def debug(client, msg: str): if msg == 'received': info( 'The server has received the request and has started working on it...' ) else: warning('Received weird output: command "debug", args:', msg)
def init_paths(): """ Searches for the tools in known location and finds their paths, or reports if they are missing. Searches in the submodule or in a Docker container. """ global PATHS_READY, TEMPLATE if PATHS_READY: verbose("Paths are already ready; exiting init_paths function") return info("\nSearching for the tools...") verbose("Working directory is [", os.getcwd(), "]") verbose("Project root is [", PROJECT_ROOT, "]") verbose("Server project root is [", SERVER_ROOT, "]") if test_directory(PROJECT_ROOT): verbose(" › Found the project root") else: error(" › Cannot find the project root!") if test_directory(SERVER_ROOT): verbose(" › Found the server root") else: error(" › Cannot find the server root!") if test_file(TEMPLATE): verbose(" › Found the template file") else: error(" › Template file not found") raise Exception("Couldn't find the template file.") verbose("Done initializing paths.\n") PATHS_READY = True
def __lambda_to_python(all_lambda: List[List[str]]) -> List[Expression]: """ Parses a list of lambda representations into a list of corresponding Python objects. :param all_lambda: A list of λ-expressions. :return: A list of Expressions, which are Python objects that represent the sentence. """ info(" › nltk: λ 🠦 Python objects") return [lexpr(drs) for sentence in all_lambda for drs in sentence]
def kill(self): """ Closes this server and its socket. """ self.socket.close() self.is_running = False [client.close() for client in self.clients] info("Server: Disconnected.")
def __ccg_jigg_xml_to_lambda(ccg_trees_jigg_xml: Any) -> List[List[str]]: """ Converts a list of CCG trees to a list of λ-expressions, using ccg2lambda and the TEMPLATE file specified in the paths.py file. :param ccg_trees_jigg_xml: A list of CCG trees in jigg xml format. :return: A list in which each element is the list of parsed semantic representations for each sentence. """ info(" › ccg2lambda: CCG in Jigg XML 🠦 λ") return parse.parse(ccg_trees_jigg_xml, TEMPLATE)[1]
def __convert_to_ccg(sentences: List[List[str]]) -> List[List[Tuple[Tree]]]: """ Converts a list of sentences (where each sentence is a list of words) to a list of CCG trees, where each tree corresponds to the sentence with the same index in the given parameter, using depccg. :param sentences: A list of (sentence: list of words) :return: A list of CCG trees """ info(" › depccg: English 🠦 CCG") return depccg_parser.parse_doc(sentences)
def __annotate_spacy( sentences: List[str]) -> Tuple[List[List[Token]], List[List[str]]]: """ Annotates a list of sentences in English, using spaCy. :param sentences: A list of English sentences. :return: a tuple of a list of all annotated tokens in each sentence, and a list of each word in each sentence. """ info(" › spaCy: annotating...") return annotate_using_spacy( [sentence.split(" ") for sentence in sentences], tokenize=True)
def __ccg_to_jigg_xml(CCG: List[List[Tuple[Tree]]], annotated_sentences: List[List[Token]]) -> Any: """ Converts a list of CCG trees to jigg xml format. :param CCG: A list of CCG trees :param annotated_sentences: A list of all annotated tokens in each of the given sentences :return: A list of CCG trees in jigg xml. """ info(" › ccg2lambda: CCG 🠦 CCG in Jigg XML") return to_jigg_xml(CCG, annotated_sentences)
def close(self, clientside=False): """ Disconnects this client. :param clientside: Whether this client disconnected itself, or was disconnected by the server. """ if clientside: info("Server:", self.coords, "was disconnected.") else: info("Server:", self.coords, "was disconnected by the server.") self.__socket.close()
def build(self): """ Add triples to the basic query template (sparql/queryBuilder/query_template.sparql) and sends the request :return: the result of the request. """ verbose("Filling variables dictionary...") self.__fill_dictionary() for triple in self.__triples: self.__query += ' ' + triple[0] + ' ' + triple[1] + \ ' ' + triple[2] + ' .' announce("Sending query: " + self.__query) info("The answer is: " + self.__request())
def nltk_to_query_objects(nltk_output: List[Expression]) -> List[Sentence]: """ Converts objects created by NLTK after the ccg2lambda pipeline into Query Objects. :param nltk_output: The output from the ccg2lambda pipeline. :return: A list of Sentences, as Query Objects. """ info(" Рђ║ nltk2qo: nltk Python Objects Ъад Query Objects") sentences = [__parse(sentence) for sentence in nltk_output] verbose("Conversion to Query Objects done.") # display some stats about the conversion return sentences
def server_logs(client, severity: str, *args: str): if severity == "verbose": verbose("Server:", *args) elif severity == "info": info(*args) elif severity == "warning": warning('Server:', *args) elif severity == "error": error(*args) else: verbose("Server: unknown severity:", *args) warning("Received a message from the server, with unknown severity [", severity, "]")
def print_help(): announce('ccg2lambda QA Assistant: Help') info(' › q, quit: \tQuits the software') info(' › ?, h, help: \tDisplays this page') info(' › v, verbose: \tEnables verbose mode') # noinspection SpellCheckingInspection info(' › nv, noverbose:\tDisables verbose mode')
def __init__(self, port: int, max_connections: int = 1): """ Instantiates a new Server object, but doesn't start it. :param port: The port the server should listen to :param max_connections: The maximum number of simultaneous clients. """ info("Server: Booting up...") self.clients = [] self.commands = {} self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.socket.bind(("", port)) self.socket.listen(max_connections) self.is_running = True info("Server: Connected on port", self.socket.getsockname()[1], "\n") # Close the server automatically when the program is killed externally atexit.register(self.kill)
def ask_client(options: List[Tuple[str, str, str, str]]) -> str: """ Asks the client to choose between multiple Wikidata objects. :param options: A list of quadruplets: (q-code, name, description, URL on Wikidata) :return The one the user chose """ if len(options) == 0: error( "The server just requested to ask a question to the client, but didn't provide any possible answer! " "Skipping.") verbose("Requested a question without providing an answer") traceback.print_stack() return "" elif len(options) == 1: warning( 'Asking a question with only one possible answer, the client will not be prompted.' ) return options[0][0] from network.server import server tmp = [ str(i) + "|" + "|".join( [o.replace('|', '~').replace(' ', '~') for o in options[i][1:4]]) for i in range(len(options)) ] verbose("I'm going to ask the client to choose between [", tmp, "]") if len(server.clients) == 0: error("No clients are connected at this point.") raise Exception("Trying to ask the client, but there's no client.") elif len(server.clients) > 1: warning("Currently,", len(server.clients), "are connected; the first one will be selected.") client = server.clients[0] verbose("The question will be asked to client [", client, "]") client.send("choose", *tmp) client.receive_message() info('The client chose [', user_choice, ']') user = int(user_choice) return options[user][0]
def pretty_print(self): """ Graphically display this Sentence, to make it easier to see what's going on. """ if len(self.__couples) != 0: error( "This sentence has not been fixed! There are still couples left: [", self.__couples, "]") if self.main is not None: info(" question marker:", self.main.id, "[", *self.main.tags, "]") else: info(" question marker not found, this sentence is not a question!") info(" events:") for e in self.events: info(" - " + e.id + ":", "[", *e.tags, "]") for v in e.variables: info(" - " + v[0] + ":", v[1].id, "[", *v[1].tags, "]")
def convert(sentences: List[str], output_file=False) -> List[Expression]: """ Converts a list of questions to an Abstract Syntax Tree (AST), using: - spaCy to annotate each word with its grammatical class, - depccg to convert the natural language to a CCG tree, - ccg2lambda to convert the CCG tree to a λ expressions, - nltk to parse that λ-expressions into an AST represented in Python objects. :param output_file: if True, will output an xml file of the parsed sentences :param sentences: a list of questions in English :exception if less than 1 sentence is provided :return a list of ASTs that each correspond to one of the given sentences (in the same order). """ if len(sentences) < 1: raise Exception("Cannot run the pipeline with less than 1 sentence: " + str(len(sentences))) announce("Beginning conversion of", len(sentences), "sentences, the first one is [", sentences[0], "]") annotated_sentences, split_sentences = __annotate_spacy(sentences) ccg_of_each_sentence = __convert_to_ccg(split_sentences) lambda_expressions = __ccg_to_jigg_xml(ccg_of_each_sentence, annotated_sentences) formulas = __ccg_jigg_xml_to_lambda(lambda_expressions) # Creates an XML file to be used if output_file: # Can be used in ccg2lambda python script visualize.py to output # sentences.html to give better overview info("Creating visualisation in file sentences.sem.xml") visualisation.visualize(lambda_expressions, "sentences.html") expr = __lambda_to_python(formulas) verbose("Conversion done.") return expr
def __init__(self, server, socket): self.__socket = socket self.__server = server self.coords = socket.getpeername() info("Server:", self.__socket.getpeername(), "has connected.")