Пример #1
0
 def __init__(self):
     self.dataset = FileIO.read_json_file("sematch/benchmark-data/data.txt")
     self.results = FileIO.read_list_file("sematch/benchmark-data/result.txt")
     self.queries = [(d['query'],d['entity']) for d in self.dataset]
     self.relevants = [d['result'] for d in self.dataset]
     self.synsetExpansion = SynsetExpansion()
     self.engine = Engine()
     self.sims = ['wup', 'lch', 'res','jcn', 'lin']
     self.thresholds = [0.9,1]
     self.gpcs = ['gpc1', 'gpc2', 'gpc3', 'gpc4', 'gpc5', 'gpc6']
Пример #2
0
from flask import Flask, jsonify, json, request, render_template as template
from QueryEngine import Engine
import os

DEBUG = True
SECRET_KEY = 'Secret_development_key'

app = Flask(__name__)
app.config.from_object(__name__)

engine = Engine()


@app.route('/api/queries')
def queries():
    query = request.args.get('query')
    return json.dumps(engine.type_entity_query_construction(query))


@app.route('/api/types')
def types():
    query = request.args.get('query')
    return json.dumps(engine.types(query))


@app.route('/api/type_search')
def type_search():
    type = request.args.get('type')
    sim = request.args.get('sim')
    th = request.args.get('th')
    result = engine.search_types(type, sim, float(th))
Пример #3
0
class Experiment:

    def __init__(self):
        self.dataset = FileIO.read_json_file("sematch/benchmark-data/data.txt")
        self.results = FileIO.read_list_file("sematch/benchmark-data/result.txt")
        self.queries = [(d['query'],d['entity']) for d in self.dataset]
        self.relevants = [d['result'] for d in self.dataset]
        self.synsetExpansion = SynsetExpansion()
        self.engine = Engine()
        self.sims = ['wup', 'lch', 'res','jcn', 'lin']
        self.thresholds = [0.9,1]
        self.gpcs = ['gpc1', 'gpc2', 'gpc3', 'gpc4', 'gpc5', 'gpc6']

    @staticmethod
    def measure(relevant, retrieved):
        a = len(relevant)
        b = len(retrieved)
        ab = 0
        for re in retrieved:
            if re in relevant:
                ab += 1
        recall = float(ab) / float(a)
        if b == 0:
            precision = 0
        else:
            precision = float(ab) / float(b)
        if precision + recall == 0:
            f = 0
        else:
            f = 2 * (precision * recall) / (precision + recall)
        return recall, precision , f

    def query_info(self, id):
        tQ, eURI = self.queries[id]
        relevant_data = self.relevants[id]
        print id, '\tquery: ', ' '.join(self.dataset[id]['terms'])
        print tQ, eURI
        print 'N(relevant)=', len(relevant_data)

    def print_query(self, id, gpc, sim, th):
        tQ, eURI = self.queries[id]
        tURIs = self.typeExpansion.expandType(tQ, sim, th)
        print "Number(types)=", len(tURIs)
        query = self.engine.query(gpc, tURIs, eURI)
        #result = self.engine.sparql.execute(query)
        result = self.engine.sparql.request_execution(query)
        print result

    def experiment(self, id, gpcs, sim, th):
        tQ, eURI = self.queries[id]
        relevant_data = self.relevants[id]
        tURIs = self.typeExpansion.expandType(tQ, sim, th)
        result = self.engine.run(gpcs,tURIs, eURI)
        #print sim, th, gpcs
        #print "N(type uris)=",len(tURIs)
        #print 'N(returned)=', len(result)
        #print 'recall is %f, precison is %f, f measure is %f' % Experiment.measure(relevant_data,result)
        print Experiment.measure(relevant_data,result)

    def analysis(self):
        data = self.results
        data = [d.lstrip('(') for d in data]
        data = [d.rstrip(')') for d in data]
        data = [d.split(',') for d in data]
        data = [map(float, d) for d in data]
        data = [data[i:i+20] for i in range(0,len(data),20)]
        sum_data = data[0]
        for d in data[1:]:
            for i in range(20):
                sum_data[i][0] += d[i][0]
                sum_data[i][1] += d[i][1]
                sum_data[i][2] += d[i][2]

        for i in range(20):
            print i+1, '***************'
            print sum_data[i][0] / 22.0
            print sum_data[i][1] / 22.0
            print '***************'