def extract_domain(top_domain_file, full_extract=True): """ Extract a dialogue domain from the XML specification :param top_domain_file: the filename of the top XML file :param full_extract: whether to extract the full domain or only the files :return: the extracted dialogue domain """ # create a new, empty domain domain = Domain() # determine the root path and filename fl = open(top_domain_file, 'r') domain.set_source_file(Path(top_domain_file)) # extract the XML document document = XMLUtils.get_xml_document(fl) main_node = XMLUtils.get_main_node(document) root_path = Path(top_domain_file).parent for child in main_node: domain = XMLDomainReader.extract_partial_domain( child, domain, root_path, full_extract) return domain
def extract_bayesian_network_from_string(full_string): """ Extracts the bayesian network from a XML string. :param full_string: the string containing the initial state content :return: the corresponding Bayesian network """ # extract the XML document document = XMLUtils.get_xml_document(io.StringIO(full_string)) main_node = XMLUtils.get_main_node(document) if main_node.tag == 'state': return XMLStateReader.get_bayesian_network(main_node) for child_node in main_node: if child_node.tag == 'state': return XMLStateReader.get_bayesian_network(child_node) return BNetwork()
def extract_dialogue(data_file): """ Extracts the dialogue specified in the data file. The result is a list of dialogue state (one for each turn). :param data_file: the XML file containing the turns :return: the list of dialogue state """ doc = XMLUtils.get_xml_document(data_file) main_node = XMLUtils.get_main_node(doc) f = open(data_file) root_path = f.name sample = [] for node in main_node: node_name = node.keys()[0] if "Turn" in node_name: state = DialogueState( XMLStateReader.get_bayesian_network(node)) sample.append(state) if node_name == "systemTurn" and state.has_chance_node("a_m"): assign = Assignment("a_m", state.query_prob("a_m").get_best()) state.add_evidence(assign) elif node_name == "wiazard": assign = Assignment.create_from_string( node.get_first_child().get_node_value().trim()) sample[-1].add_evidence(assign) elif node_name == "import": file_name = main_node.get_attributes().get_named_item( "href").get_node_value() points = XMLDialogueReader.extract_dialogue(root_path + "/" + file_name) sample.append(points) return sample
def extract_bayesian_network(file, tag): """ Returns the initial state or parameters from the XML document, for the given domain (where the variable types are already declared) :param file: the file to process :param tag: the XML tag to search for :return: the specified Bayesian network """ # extract the XML document document = XMLUtils.get_xml_document(file) main_node = XMLUtils.get_main_node(document) if main_node.tag == tag: return XMLStateReader.get_bayesian_network(main_node) for child_node in main_node: if child_node.tag == tag: return XMLStateReader.get_bayesian_network(child_node) raise ValueError()
def extract_partial_domain(main_node, domain, root_path, full_extract): """ Extracts a partially specified domain from the XML node and add its content to the dialogue domain. :param main_node: main XML node :param domain: dialogue domain :param root_path: root path (necessary to handle references) :param full_extract: whether to extract the full domain or only the files :return: the augmented dialogue domain """ tag = main_node.tag if tag == 'domain': # extracting rule-based probabilistic model for child in main_node: domain = XMLDomainReader.extract_partial_domain( child, domain, root_path, full_extract) elif tag == 'import': # extracting imported references try: file_name = main_node.attrib['href'] file_path = str(root_path) + os.sep + file_name fl = Path(file_path) domain.add_imported_files(fl) sub_document = XMLUtils.get_xml_document(file_path) domain = XMLDomainReader.extract_partial_domain( XMLUtils.get_main_node(sub_document), domain, root_path, full_extract) except: raise ValueError() if not full_extract: return domain if tag == 'settings': # extracting settings settings = XMLUtils.extract_mapping(main_node) domain.get_settings().fill_settings(settings) if tag == 'function': # extracting custom functions # try: domain_function_name = main_node.attrib['name'].strip() module_name, actual_function_name = main_node.text.rsplit('.', 1) mod = importlib.import_module(module_name) func = getattr(mod, actual_function_name) domain.get_settings().add_function(domain_function_name, func) # except: # raise ValueError() if tag == 'initialstate': # extracting initial state state = XMLStateReader.get_bayesian_network(main_node) domain.set_initial_state(DialogueState(state)) if tag == 'model': # extracting rule-based probabilistic model model = XMLDomainReader._create_model(main_node) domain.add_model(model) if tag == 'parameters': # extracting parameters parameters = XMLStateReader.get_bayesian_network(main_node) domain.set_parameters(parameters) if XMLUtils.has_content(main_node): if main_node == '#text': # TODO: main_node -> main_node.tag ?? raise ValueError() return domain