Пример #1
0
    def prepare_double_relation_classifier_dataset(self, file_path=None):
        if file_path is None:
            ds = LC_Qaud()
        else:
            ds = LC_Qaud(self.filepath)
        ds.load()
        ds.parse()

        X = []
        y = []
        for qapair in ds.qapairs:
            X.append(qapair.question.text)
            relation_uris = [u for u in qapair.sparql.uris if u.is_ontology() or u.is_type()]
            if len(relation_uris) != len(set(relation_uris)):
                y.append(1)
            else:
                y.append(0)

        return X, y
Пример #2
0
    def prepare_question_classifier_dataset(self, file_path=None):
        if file_path is None:
            ds = LC_Qaud()
        else:
            ds = LC_Qaud(file_path)
        ds.load()
        ds.parse()

        X = []
        y = []
        for qapair in ds.qapairs:
            X.append(qapair.question.text)
            if "COUNT(" in qapair.sparql.raw_query:
                y.append(2)
            elif "ASK WHERE" in qapair.sparql.raw_query:
                y.append(1)
            else:
                y.append(0)

        return X, y
Пример #3
0

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description='Analyse the output of query generator')
    parser.add_argument("--file",
                        help="file name to load the results",
                        default="tmp",
                        dest="file_name")
    parser.add_argument("--filter",
                        help="file name to filter the results",
                        default="-",
                        dest="filter_name")
    args = parser.parse_args()

    ds_1 = load_ds(args.file_name)

    id_to_include = []
    if args.filter_name != "-":
        ds = LC_Qaud(args.filter_name)
        ds.load()
        ds.parse()
        for item in ds.qapairs:
            id_to_include.append(item.id)

    print default(ds_1, id_to_include)

    # ds_1 = load_ds("wq_14")
    # print default(ds_1)
    # bar_chart_per_feature(ds_1)
Пример #4
0
    return r.status_code, r.json()


def has_answer(t):
    if "results" in t and len(t["results"]["bindings"]) > 0:
        return True
    if "boolean" in t:
        return True
    return False


if __name__ == "__main__":
    # print query("SELECT DISTINCT ?uri WHERE {?uri <http://dbpedia.org/ontology/developer> <http://dbpedia.org/resource/J._Michael_Straczynski> . ?uri <http://dbpedia.org/property/network> <http://dbpedia.org/resource/TNT_(TV_channel)>  . ?uri <https://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/TelevisionShow>}".replace("/property", "/ontology"))
    i = 0
    # ds = LC_Qaud_Linked(path="./data/LC-QUAD/linked.json")
    ds = LC_Qaud()
    tmp = []
    no_answer = 0
    no_entity = 0
    for qapair in prepare_dataset(ds).qapairs:
        raw_row = dict()
        raw_row["id"] = qapair.id.__str__()
        raw_row["question"] = qapair.question.__str__()
        raw_row["sparql_query"] = qapair.sparql.query
        try:
            r = query(qapair.sparql.query)
            raw_row["answers"] = r[1]
        except Exception as e:
            raw_row["answers"] = []
            print e
            print
Пример #5
0
if __name__ == "__main__":

    with open('data/LC-QUAD/train-data.json', 'r', encoding='utf-8') as f:
        train = json.load(f)

    with open('data/LC-QUAD/test-data.json', 'r', encoding='utf-8') as f:
        test = json.load(f)

    data = train + test
    print('data len: ', len(data))

    with open("data/LC-QUAD/data.json", "w") as write_file:
        json.dump(data, write_file)

    ds = LC_Qaud(path="./data/LC-QUAD/data.json")
    tmp = []
    for qapair in prepare_dataset(ds).qapairs:
        raw_row = dict()
        raw_row["id"] = qapair.id.__str__()
        raw_row["question"] = qapair.question.text
        raw_row["sparql_query"] = qapair.sparql.query
        try:
            r = query(qapair.sparql.query)
            raw_row["answers"] = r[1]
        except Exception as e:
            raw_row["answers"] = []

        tmp.append(raw_row)

    with open('data/LC-QUAD/linked_answer.json', 'w') as jsonFile:
Пример #6
0
        yield triples
    else:
        while idx >= 0:
            yield template[0:idx]
            template = template[idx + 1:]
            idx = findnth(template, " ", 2)
        yield template


if __name__ == "__main__":
    WHERE = "WHERE"
    total = 0
    templates = {}
    # Qald(Qald.qald_6) LC_Qaud WebQSP
    for item in prepare_dataset(
            LC_Qaud("../data/LC-QUAD/linked_3200.json")).qapairs:
        if not item.sparql.supported:
            continue

        where_clause = item.sparql.where_clause_template
        templates[where_clause] = 1 + (templates[where_clause]
                                       if where_clause in templates else 0)

        total += 1
    # if total > 100:
    # 	break

    # List templates based on number of times they have been used
    sorted_templates = sorted(templates.items(),
                              key=operator.itemgetter(1),
                              reverse=True)
Пример #7
0
from sklearn.model_selection import train_test_split
from parser.webqsp import WebQSP
from parser.lc_quad import LC_Qaud
from lsa.dssm import DSSM
from common.preprocessing.preprocessor import Preprocessor
from common.graph.graph import Graph
from linker.jerrl import Jerrl


def qapairs_to_triple(qapairs):
    return [{"id": item.id, "question": item.question.text, "query": item.sparql.raw_query, "uris": item.sparql.uris}
            for item in qapairs]


jerrl = Jerrl()
ds = LC_Qaud()
kb = ds.parser.kb
ds.load()
ds.parse()

# ds_train, ds_test, _, _ = train_test_split(ds.qapairs, [1] * len(ds.qapairs), test_size=0.2)

ds_train = ds.qapairs[:4000]
ds_test = ds.qapairs[4000:]

ds_train = qapairs_to_triple(ds_train)
ds_test = qapairs_to_triple(ds_test)

model = DSSM(max_steps=10)
# questions, queries, ids = Preprocessor.qapair_to_hash(ds_train)
# model.train([questions, queries])
Пример #8
0
    if idx == -1:
        yield triples
    else:
        while idx >= 0:
            yield template[0:idx]
            template = template[idx + 1:]
            idx = findnth(template, " ", 2)
        yield template


if __name__ == "__main__":
    WHERE = "WHERE"
    total = 0
    templates = {}
    # Qald(Qald.qald_6) LC_Qaud WebQSP
    for item in prepare_dataset(LC_Qaud()).qapairs:
        if not item.sparql.supported:
            continue

        where_clause = item.sparql.where_clause
        templates[where_clause] = 1 + (templates[where_clause]
                                       if where_clause in templates else 0)

        total += 1
    # if total > 100:
    # 	break

    # List templates based on number of times they have been used
    sorted_templates = sorted(templates.items(),
                              key=operator.itemgetter(1),
                              reverse=True)
Пример #9
0
def get_entries(name, path):
    with open(path) as file:
        for line in file:
            if "http://www.wdaqua.eu/qa#" + name in line:
                buf = [line]
                buf.extend(takewhile(str.strip, file))  # read until blank line
                yield re.findall(r'<(http://dbpedia[^>]+)>', ''.join(buf))


if __name__ == "__main__":
    base_dir = "../data/"
    rel_dir_name = os.path.join(base_dir, "relnliodLogs")
    ned_dir_name = os.path.join(base_dir, "tagmeNEDlogs")

    ds = LC_Qaud("../data/LC-QUAD/linked_3200.json")
    ds.load()
    ds.parse()

    i = 0
    input_files = os.listdir(rel_dir_name)
    input_files.sort()
    dataset = []
    q = 0
    for name in tqdm(input_files):
        # print i
        relations = list(
            get_entries("AnnotationOfRelation",
                        os.path.join(rel_dir_name, name)))
        if len(relations) > 0:
            relations = [{