def __init__(self): self.dataset = FileIO.read_json_file("sematch/benchmark-data/data.txt") self.results = FileIO.read_list_file("sematch/benchmark-data/result.txt") self.queries = [(d['query'],d['entity']) for d in self.dataset] self.relevants = [d['result'] for d in self.dataset] self.synsetExpansion = SynsetExpansion() self.engine = Engine() self.sims = ['wup', 'lch', 'res','jcn', 'lin'] self.thresholds = [0.9,1] self.gpcs = ['gpc1', 'gpc2', 'gpc3', 'gpc4', 'gpc5', 'gpc6']
from flask import Flask, jsonify, json, request, render_template as template from QueryEngine import Engine import os DEBUG = True SECRET_KEY = 'Secret_development_key' app = Flask(__name__) app.config.from_object(__name__) engine = Engine() @app.route('/api/queries') def queries(): query = request.args.get('query') return json.dumps(engine.type_entity_query_construction(query)) @app.route('/api/types') def types(): query = request.args.get('query') return json.dumps(engine.types(query)) @app.route('/api/type_search') def type_search(): type = request.args.get('type') sim = request.args.get('sim') th = request.args.get('th') result = engine.search_types(type, sim, float(th))
class Experiment: def __init__(self): self.dataset = FileIO.read_json_file("sematch/benchmark-data/data.txt") self.results = FileIO.read_list_file("sematch/benchmark-data/result.txt") self.queries = [(d['query'],d['entity']) for d in self.dataset] self.relevants = [d['result'] for d in self.dataset] self.synsetExpansion = SynsetExpansion() self.engine = Engine() self.sims = ['wup', 'lch', 'res','jcn', 'lin'] self.thresholds = [0.9,1] self.gpcs = ['gpc1', 'gpc2', 'gpc3', 'gpc4', 'gpc5', 'gpc6'] @staticmethod def measure(relevant, retrieved): a = len(relevant) b = len(retrieved) ab = 0 for re in retrieved: if re in relevant: ab += 1 recall = float(ab) / float(a) if b == 0: precision = 0 else: precision = float(ab) / float(b) if precision + recall == 0: f = 0 else: f = 2 * (precision * recall) / (precision + recall) return recall, precision , f def query_info(self, id): tQ, eURI = self.queries[id] relevant_data = self.relevants[id] print id, '\tquery: ', ' '.join(self.dataset[id]['terms']) print tQ, eURI print 'N(relevant)=', len(relevant_data) def print_query(self, id, gpc, sim, th): tQ, eURI = self.queries[id] tURIs = self.typeExpansion.expandType(tQ, sim, th) print "Number(types)=", len(tURIs) query = self.engine.query(gpc, tURIs, eURI) #result = self.engine.sparql.execute(query) result = self.engine.sparql.request_execution(query) print result def experiment(self, id, gpcs, sim, th): tQ, eURI = self.queries[id] relevant_data = self.relevants[id] tURIs = self.typeExpansion.expandType(tQ, sim, th) result = self.engine.run(gpcs,tURIs, eURI) #print sim, th, gpcs #print "N(type uris)=",len(tURIs) #print 'N(returned)=', len(result) #print 'recall is %f, precison is %f, f measure is %f' % Experiment.measure(relevant_data,result) print Experiment.measure(relevant_data,result) def analysis(self): data = self.results data = [d.lstrip('(') for d in data] data = [d.rstrip(')') for d in data] data = [d.split(',') for d in data] data = [map(float, d) for d in data] data = [data[i:i+20] for i in range(0,len(data),20)] sum_data = data[0] for d in data[1:]: for i in range(20): sum_data[i][0] += d[i][0] sum_data[i][1] += d[i][1] sum_data[i][2] += d[i][2] for i in range(20): print i+1, '***************' print sum_data[i][0] / 22.0 print sum_data[i][1] / 22.0 print '***************'