def GetQueryPrefixes(self, query): LogManager.LogInfo(f"Extracting prefixes from query...") prefixes = [] inlineQuery = query.replace('\n', ' ').replace('\t', '') queryRegex = r'(.*)[Ss][Ee][Ll][Ee][Cc][Tt](.*)' matchPrefixes = re.match(queryRegex, inlineQuery) if not matchPrefixes: LogManager.LogInfo(f"No prefixes found") return None allPrefixes = matchPrefixes.group(1) nPrefixes = [i.lstrip(' ') for i in allPrefixes.split('>') if i != ' '] nPrefixes = [i + '>' for i in nPrefixes if i != ''] for prefix in nPrefixes: prefxRegex = '[Pp][Rr][Ee][Ff][Ii][Xx](.*?):' matchPrefix = re.match(prefxRegex, prefix) if not matchPrefix: LogManager.LogError("Invalid Prefixes") return None prefixes.append((matchPrefix.group(1).lstrip(' '), prefix)) LogManager.LogInfo( f"Prefixes extracted successfully! Prefixes: {prefixes}") return prefixes
def GetQueryTriples(self, query): LogManager.LogInfo(f"Extracting triples from query...") triples = [] inlineQuery = query.replace('\n', ' ').replace('\t', '') allTriples = inlineQuery[inlineQuery.find("{") + 1:inlineQuery.find("}")] allTriples = re.sub(r'\([^)]*\)', '', allTriples) removeFilter = re.compile(re.escape('filter'), re.IGNORECASE) allTriples = removeFilter.sub('', allTriples) nTriples = [i for i in shlex.split(allTriples) if i != '.'] if len(nTriples) % 3 != 0: self.queryIsValid = False self.queryError = f"Invalid triples format on query: {nTriples}" LogManager.LogInfo(self.queryError) return None triplesNum = len(nTriples) // 3 for i in range(triplesNum + 1): if i != 0: i = (i * 3) - 1 sub = nTriples[i - 2] pred = nTriples[i - 1] obj = nTriples[i] triples.append((sub, pred, obj)) LogManager.LogInfo( f"Triples extracted successfully! Triples: {triples}") return triples
def ValidateQuery(self, query): LogManager.LogInfo(f"Validating query: {query}") inlineQuery = query.replace('\n', ' ').replace('\t', '').replace(' ', '') queryRegex = r'(.*)[Ss][Ee][Ll][Ee][Cc][Tt](.*)[Ww][Hh][Ee][Rr][Ee]{(.*)}(.*)' matchQuery = re.match(queryRegex, inlineQuery) if matchQuery: LogManager.LogInfo(f"Query structure is valid") return True else: return False
def GetQueryAnswers(self): LogManager.LogInfo(f"Getting query answer from endpoint...") answers = [] for result in self.serverAnswer["results"]["bindings"]: answers.append(result[self.queryVariable[0]]["value"]) if len(answers) == 0: LogManager.LogInfo(f"No answer from endpoint") return None return answers
def GetPredicateAnswer(self, variable, triples, answers, isVariablePosSubject): LogManager.LogInfo(f"Getting main predicate for verbalizer...") var = '?' + variable isNounPlural = True if len(answers) > 1 else False isVerbInfiniteForm = True if len( answers) > 1 and isVariablePosSubject else False isAccusative = True if isVariablePosSubject: # TODO Implement case when query variable is subject return '' else: answerTriple = [tri for tri in triples if var == tri[2]] predicate = answerTriple[0][1] if predicate != None: predicate = predicate.rsplit('/', 1)[-1].lstrip('<').rstrip( '>') if 'http' in predicate else predicate.strip( '"').split(':')[1] if '_' in predicate: return Dict.CheckPredicates(predicate.split('_'), isVerbInfiniteForm, isNounPlural, isAccusative) splitPredicate = SplitWords.split(predicate) return Dict.CheckPredicates( [predicate], isVerbInfiniteForm, isNounPlural, isAccusative ) if splitPredicate == predicate else Dict.CheckPredicates( [word.lower() for word in splitPredicate], isVerbInfiniteForm, isNounPlural, isAccusative) else: return '' # TODO Split predicate if it consists from more than 2 words return predicate
def GetSubjectLabel(self, variable, triples, isVariablePosSubject): LogManager.LogInfo(f"Getting label for subject...") var = '?' + variable allQueryLabels = [ 'rdfs:label', '<http://www.w3.org/2000/01/rdf-schema#label>' ] if isVariablePosSubject: # TODO Implement case when query variable is subject return '' else: answerTriple = [tri for tri in triples if var == tri[2]] subj = answerTriple[0][0] subjLabel = [ tri[2] for tri in triples if subj == tri[0] and tri[1] in allQueryLabels ] if subjLabel: if not subjLabel[0].startswith('?'): return subjLabel[0].strip('"').split('"')[0] else: # TODO label should be somewhere in query e.g. FILTER, use regex to extract it return '' elif '<http' in subj: return subj.rsplit('/', 1)[-1].lstrip('<').rstrip('>') elif ':' in subj: return subj.strip('"').split(':')[1] else: # TODO Send query to endpoint to get label return ''
def GetSubjectArtikel(self, type): LogManager.LogInfo(f"Getting artikel for subject type...") if type == '' or type == None: return '' else: newType = Dict.CheckType(type) return newType.title() if any( x in newType for x in ['der', 'die', 'das']) else 'Der ' + type
def init(endpoint="http://dbpedia.org/sparql"): LogManager.LogInfo(f"Initializing SPARQLEndpointManager...") try: SPARQLEndpointManager.endpoint = endpoint SPARQLEndpointManager.sparql = SPARQLWrapper(endpoint) except Exception as e: LogManager.LogError(f"Failed to initialize SPARQLEndpointManager") LogManager.LogError(e)
def __init__(self, query): LogManager.LogInfo(f"Starting Verbalize Manager...") self.answer = None self.parser = SPARQLParserManager(query) if self.parser.queryIsValid: self.answer = self.Verbalize(self.parser.queryVariable[0], self.parser.queryTriples, self.parser.queryAnswer, self.parser.queryPrefixes) else: LogManager.LogError(f"Invalid query syntax for query:\n{query}") self.answer = self.parser.queryError
def SendQuery(query, returnFormat=JSON): LogManager.LogInfo( f"Sending query to endpoint {SPARQLEndpointManager.endpoint}") try: SPARQLEndpointManager.sparql.setQuery(query) SPARQLEndpointManager.sparql.setReturnFormat(returnFormat) results = SPARQLEndpointManager.sparql.query().convert() return results except Exception as e: LogManager.LogError( f"Unable to Send query to {SPARQLEndpointManager.endpoint}") LogManager.LogError(e) return None
def GetSubjectType(self, variable, triples, prefixes, isVariablePosSubject): LogManager.LogInfo(f"Getting type of subject...") var = '?' + variable allRdfTypes = [ 'a', 'rdf:type', '<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>' ] allRdfLabels = [ 'rdfs:label', '<http://www.w3.org/2000/01/rdf-schema#label>' ] if isVariablePosSubject: # TODO Implement case when query variable is subject return '' else: answerTriple = [tri for tri in triples if var == tri[2]] subj = answerTriple[0][0] subjType = [ tri[2] for tri in triples if subj == tri[0] and tri[1] in allRdfTypes ] if subjType and not subjType[0].startswith('?'): return subjType[0].rsplit('/', 1)[-1].lstrip('<').rstrip( '>') if '<http' in subjType[0] else subjType[0].strip( '"').split(':')[1] else: if 'http' in subj or ':' in subj: return SPARQLEndpointManager.SendQueryForLabel( subj, prefixes) elif subj.startswith('?'): labelTriple = [ tri for tri in triples if subj == tri[0] and tri[1] in allRdfLabels and not tri[2].startswith('?') ] if not labelTriple: return '' else: queryType = SPARQLEndpointManager.SendQueryForType( subj, labelTriple, prefixes) return queryType.rsplit('/', 1)[-1] if 'http' in queryType else \ queryType if queryType is not None else '' else: return ''
def SendQueryForLabel(variable, prefixes='', returnFormat=JSON): LogManager.LogInfo( f"Sending query to endpoint {SPARQLEndpointManager.endpoint} for getting label of {variable}" ) stringPrefixes = [pref[1] for pref in prefixes] if prefixes != '' else [''] query = ' '.join( stringPrefixes ) + ' SELECT ?label WHERE { ' + variable + ' <http://www.w3.org/2000/01/rdf-schema#label> ' + '?label . }' try: SPARQLEndpointManager.sparql.setQuery(query) SPARQLEndpointManager.sparql.setReturnFormat(returnFormat) results = SPARQLEndpointManager.sparql.query().convert() return results["results"]["bindings"][0]["label"]["value"] if len( results["results"]["bindings"]) > 0 else '' except Exception as e: LogManager.LogError( f"Unable to Send query to {SPARQLEndpointManager.endpoint}") LogManager.LogError(e) return None
def ParseQuery(self, query): LogManager.LogInfo(f"Parsing SPARQL query...") self.queryIsValid = self.ValidateQuery(query) self.serverAnswer = SPARQLEndpointManager.SendQuery(query) if self.serverAnswer is None: self.queryIsValid = False if self.queryIsValid: self.queryPrefixes = self.GetQueryPrefixes(query) self.queryTriples = self.GetQueryTriples(query) if self.queryIsValid: self.queryVariable = self.GetQueryVariables() if len(self.queryVariable) == 1: self.queryAnswer = self.GetQueryAnswers() if self.queryAnswer == None: self.queryIsValid = False self.queryError = f"Die Abfrage hat keine Antwort." else: self.queryIsValid = False self.queryError = f"Sie können nur eine Variable abfragen. {self.queryVariable}\nAbfragen Antwort: {self.serverAnswer}" else: self.queryError = f"Ungültige Abfragesyntax. Wir unterstützen nur Abfragen vom Typ 'SELECT'."
def init(queriesNum=10): LogManager.LogInfo( f"Initializing XMLManager and getting latest version and queries..." ) try: VersionHistory = ET.parse('VersionHistory.xml') Versions = [ Version for Version in VersionHistory.findall('Version') ] XMLManager.latestVersion = Versions[-1].attrib['ID'] except IOError: LogManager.LogError(f"Unable to get latest version from file") try: XMLManager.sampleQueriesNum = queriesNum SolideQueries = ET.parse('queries.xml') XMLManager.queries = [ (question[0].text, question[1].text) for question in SolideQueries.findall('question') ] XMLManager.labels = [label[0] for label in XMLManager.queries] except IOError: LogManager.LogError(f"Unable to get queries from file")
def SendQueryForType(variable, labelTriple, prefixes='', returnFormat=JSON): LogManager.LogInfo( f"Sending query to endpoint {SPARQLEndpointManager.endpoint} for getting type of {variable}" ) stringLabelTriple = f'{labelTriple[0][0]} {labelTriple[0][1]} "{labelTriple[0][2]}"' stringPrefixes = [pref[1] for pref in prefixes] if prefixes != '' else [''] query = ' '.join( stringPrefixes ) + ' SELECT DISTINCT ?type WHERE { ' + variable + ' <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ' + '?type . ' + stringLabelTriple + ' } LIMIT 1' try: SPARQLEndpointManager.sparql.setQuery(query) SPARQLEndpointManager.sparql.setReturnFormat(returnFormat) results = SPARQLEndpointManager.sparql.query().convert() return results["results"]["bindings"][0]["type"]["value"] if len( results["results"]["bindings"]) > 0 else '' except Exception as e: LogManager.LogError( f"Unable to Send query to {SPARQLEndpointManager.endpoint}") LogManager.LogError(e) return None
def GetQueryVariables(self): LogManager.LogInfo( f"Extracting query variable from endpoint answer...") return self.serverAnswer["head"]["vars"]
from flask import Flask, render_template, request, get_template_attribute from models.LogManager import LogManager from models.XMLManager import XMLManager from models.SPARQLEndpointManager import SPARQLEndpointManager from models.VerbalizeManager import VerbalizeManager import json LogManager.init() XMLManager.init() SPARQLEndpointManager.init("http://127.0.0.1:3030/solide/sparql") LogManager.LogInfo('Starting Flask Application...') app = Flask(__name__) @app.route('/') def index(): return render_template('index.html', labels=XMLManager.GetRandomLabels(), version=XMLManager.GetLatestVersion()) @app.route('/', methods=['POST']) def PostRequests(): if 'query' in request.form : sparqlQuery = request.form['query'] verbilizer = VerbalizeManager(sparqlQuery) return str(verbilizer.answer) elif 'label' in request.form: label = request.form['label'] queryLabel = XMLManager.GetSpecificQuery(label) return queryLabel elif 'sample' in request.form: sampleQueries = XMLManager.GetRandomLabels()
def ConcatenateAnswer(self, subjWithArticle, subjLabel, predicate, answer): LogManager.LogInfo(f"Constructing final verbalized answer...") return subjWithArticle + ' ' + subjLabel + ' ' + str( predicate) + ' ' + str(answer) + '.'