def prepare_double_relation_classifier_dataset(self, file_path=None): if file_path is None: ds = LC_Qaud() else: ds = LC_Qaud(self.filepath) ds.load() ds.parse() X = [] y = [] for qapair in ds.qapairs: X.append(qapair.question.text) relation_uris = [u for u in qapair.sparql.uris if u.is_ontology() or u.is_type()] if len(relation_uris) != len(set(relation_uris)): y.append(1) else: y.append(0) return X, y
def prepare_question_classifier_dataset(self, file_path=None): if file_path is None: ds = LC_Qaud() else: ds = LC_Qaud(file_path) ds.load() ds.parse() X = [] y = [] for qapair in ds.qapairs: X.append(qapair.question.text) if "COUNT(" in qapair.sparql.raw_query: y.append(2) elif "ASK WHERE" in qapair.sparql.raw_query: y.append(1) else: y.append(0) return X, y
if __name__ == "__main__": parser = argparse.ArgumentParser( description='Analyse the output of query generator') parser.add_argument("--file", help="file name to load the results", default="tmp", dest="file_name") parser.add_argument("--filter", help="file name to filter the results", default="-", dest="filter_name") args = parser.parse_args() ds_1 = load_ds(args.file_name) id_to_include = [] if args.filter_name != "-": ds = LC_Qaud(args.filter_name) ds.load() ds.parse() for item in ds.qapairs: id_to_include.append(item.id) print default(ds_1, id_to_include) # ds_1 = load_ds("wq_14") # print default(ds_1) # bar_chart_per_feature(ds_1)
return r.status_code, r.json() def has_answer(t): if "results" in t and len(t["results"]["bindings"]) > 0: return True if "boolean" in t: return True return False if __name__ == "__main__": # print query("SELECT DISTINCT ?uri WHERE {?uri <http://dbpedia.org/ontology/developer> <http://dbpedia.org/resource/J._Michael_Straczynski> . ?uri <http://dbpedia.org/property/network> <http://dbpedia.org/resource/TNT_(TV_channel)> . ?uri <https://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/TelevisionShow>}".replace("/property", "/ontology")) i = 0 # ds = LC_Qaud_Linked(path="./data/LC-QUAD/linked.json") ds = LC_Qaud() tmp = [] no_answer = 0 no_entity = 0 for qapair in prepare_dataset(ds).qapairs: raw_row = dict() raw_row["id"] = qapair.id.__str__() raw_row["question"] = qapair.question.__str__() raw_row["sparql_query"] = qapair.sparql.query try: r = query(qapair.sparql.query) raw_row["answers"] = r[1] except Exception as e: raw_row["answers"] = [] print e print
if __name__ == "__main__": with open('data/LC-QUAD/train-data.json', 'r', encoding='utf-8') as f: train = json.load(f) with open('data/LC-QUAD/test-data.json', 'r', encoding='utf-8') as f: test = json.load(f) data = train + test print('data len: ', len(data)) with open("data/LC-QUAD/data.json", "w") as write_file: json.dump(data, write_file) ds = LC_Qaud(path="./data/LC-QUAD/data.json") tmp = [] for qapair in prepare_dataset(ds).qapairs: raw_row = dict() raw_row["id"] = qapair.id.__str__() raw_row["question"] = qapair.question.text raw_row["sparql_query"] = qapair.sparql.query try: r = query(qapair.sparql.query) raw_row["answers"] = r[1] except Exception as e: raw_row["answers"] = [] tmp.append(raw_row) with open('data/LC-QUAD/linked_answer.json', 'w') as jsonFile:
yield triples else: while idx >= 0: yield template[0:idx] template = template[idx + 1:] idx = findnth(template, " ", 2) yield template if __name__ == "__main__": WHERE = "WHERE" total = 0 templates = {} # Qald(Qald.qald_6) LC_Qaud WebQSP for item in prepare_dataset( LC_Qaud("../data/LC-QUAD/linked_3200.json")).qapairs: if not item.sparql.supported: continue where_clause = item.sparql.where_clause_template templates[where_clause] = 1 + (templates[where_clause] if where_clause in templates else 0) total += 1 # if total > 100: # break # List templates based on number of times they have been used sorted_templates = sorted(templates.items(), key=operator.itemgetter(1), reverse=True)
from sklearn.model_selection import train_test_split from parser.webqsp import WebQSP from parser.lc_quad import LC_Qaud from lsa.dssm import DSSM from common.preprocessing.preprocessor import Preprocessor from common.graph.graph import Graph from linker.jerrl import Jerrl def qapairs_to_triple(qapairs): return [{"id": item.id, "question": item.question.text, "query": item.sparql.raw_query, "uris": item.sparql.uris} for item in qapairs] jerrl = Jerrl() ds = LC_Qaud() kb = ds.parser.kb ds.load() ds.parse() # ds_train, ds_test, _, _ = train_test_split(ds.qapairs, [1] * len(ds.qapairs), test_size=0.2) ds_train = ds.qapairs[:4000] ds_test = ds.qapairs[4000:] ds_train = qapairs_to_triple(ds_train) ds_test = qapairs_to_triple(ds_test) model = DSSM(max_steps=10) # questions, queries, ids = Preprocessor.qapair_to_hash(ds_train) # model.train([questions, queries])
if idx == -1: yield triples else: while idx >= 0: yield template[0:idx] template = template[idx + 1:] idx = findnth(template, " ", 2) yield template if __name__ == "__main__": WHERE = "WHERE" total = 0 templates = {} # Qald(Qald.qald_6) LC_Qaud WebQSP for item in prepare_dataset(LC_Qaud()).qapairs: if not item.sparql.supported: continue where_clause = item.sparql.where_clause templates[where_clause] = 1 + (templates[where_clause] if where_clause in templates else 0) total += 1 # if total > 100: # break # List templates based on number of times they have been used sorted_templates = sorted(templates.items(), key=operator.itemgetter(1), reverse=True)
def get_entries(name, path): with open(path) as file: for line in file: if "http://www.wdaqua.eu/qa#" + name in line: buf = [line] buf.extend(takewhile(str.strip, file)) # read until blank line yield re.findall(r'<(http://dbpedia[^>]+)>', ''.join(buf)) if __name__ == "__main__": base_dir = "../data/" rel_dir_name = os.path.join(base_dir, "relnliodLogs") ned_dir_name = os.path.join(base_dir, "tagmeNEDlogs") ds = LC_Qaud("../data/LC-QUAD/linked_3200.json") ds.load() ds.parse() i = 0 input_files = os.listdir(rel_dir_name) input_files.sort() dataset = [] q = 0 for name in tqdm(input_files): # print i relations = list( get_entries("AnnotationOfRelation", os.path.join(rel_dir_name, name))) if len(relations) > 0: relations = [{