def on_post(self, req, resp): main_logger.info('POST image') id = req.get_param('id', None) width = req.get_param('w', 1000) height = req.get_param('h', 400) svg = req.stream.read() template = """ <style type="text/css"> <![CDATA[ .rv-treemap{font-size:12px;position:relative}.rv-treemap__leaf{overflow:hidden;position:absolute}.rv-treemap__leaf--circle{align-items:center;border-radius:100%;display:flex;justify-content:center}.rv-treemap__leaf__content{overflow:hidden;padding:10px;text-overflow:ellipsis}.rv-xy-plot{color:#c3c3c3;position:relative}.rv-xy-plot canvas{pointer-events:none}.rv-xy-plot .rv-xy-canvas{pointer-events:none;position:absolute}.rv-xy-plot__inner{display:block}.rv-xy-plot__axis__line{fill:none;stroke-width:2px;stroke:#e6e6e9}.rv-xy-plot__axis__tick__line{stroke:#e6e6e9}.rv-xy-plot__axis__tick__text{fill:#6b6b76;font-size:11px}.rv-xy-plot__axis__title text{fill:#6b6b76;font-size:11px}.rv-xy-plot__grid-lines__line{stroke:#e6e6e9}.rv-xy-plot__circular-grid-lines__line{fill-opacity:0;stroke:#e6e6e9}.rv-xy-plot__series,.rv-xy-plot__series path{pointer-events:all}.rv-xy-plot__circular-grid-lines__line{fill-opacity:0;stroke:#e6e6e9}.rv-xy-plot__series,.rv-xy-plot__series path{pointer-events:all}.rv-xy-plot__series--line{fill:none;stroke:#000;stroke-width:2px}.rv-crosshair{position:absolute;font-size:11px;pointer-events:none}.rv-crosshair__line{background:#47d3d9;width:1px}.rv-crosshair__inner{position:absolute;text-align:left;top:0}.rv-crosshair__inner__content{border-radius:4px;background:#3a3a48;color:#fff;font-size:12px;padding:7px 10px;box-shadow:0 2px 4px rgba(0,0,0,0.5)}.rv-crosshair__inner--left{right:4px}.rv-crosshair__inner--right{left:4px}.rv-crosshair__title{font-weight:bold;white-space:nowrap}.rv-crosshair__item{white-space:nowrap}.rv-hint{position:absolute;pointer-events:none}.rv-hint__content{border-radius:4px;padding:7px 10px;font-size:12px;background:#3a3a48;box-shadow:0 2px 4px rgba(0,0,0,0.5);color:#fff;text-align:left;white-space:nowrap}.rv-discrete-color-legend{box-sizing:border-box;overflow-y:auto;font-size:12px}.rv-discrete-color-legend.horizontal{white-space:nowrap}.rv-discrete-color-legend-item{color:#3a3a48;border-radius:1px;padding:9px 10px}.rv-discrete-color-legend-item.horizontal{display:inline-block}.rv-discrete-color-legend-item.horizontal .rv-discrete-color-legend-item__title{margin-left:0;display:block}.rv-discrete-color-legend-item__color{background:#dcdcdc;display:inline-block;height:2px;vertical-align:middle;width:14px}.rv-discrete-color-legend-item__title{margin-left:10px}.rv-discrete-color-legend-item.disabled{color:#b8b8b8}.rv-discrete-color-legend-item.clickable{cursor:pointer}.rv-discrete-color-legend-item.clickable:hover{background:#f9f9f9}.rv-search-wrapper{display:flex;flex-direction:column}.rv-search-wrapper__form{flex:0}.rv-search-wrapper__form__input{width:100%;color:#a6a6a5;border:1px solid #e5e5e4;padding:7px 10px;font-size:12px;box-sizing:border-box;border-radius:2px;margin:0 0 9px;outline:0}.rv-search-wrapper__contents{flex:1;overflow:auto}.rv-continuous-color-legend{font-size:12px}.rv-continuous-color-legend .rv-gradient{height:4px;border-radius:2px;margin-bottom:5px}.rv-continuous-size-legend{font-size:12px}.rv-continuous-size-legend .rv-bubbles{text-align:justify;overflow:hidden;margin-bottom:5px;width:100%}.rv-continuous-size-legend .rv-bubble{background:#d8d9dc;display:inline-block;vertical-align:bottom}.rv-continuous-size-legend .rv-spacer{display:inline-block;font-size:0;line-height:0;width:100%}.rv-legend-titles{height:16px;position:relative}.rv-legend-titles__left,.rv-legend-titles__right,.rv-legend-titles__center{position:absolute;white-space:nowrap;overflow:hidden}.rv-legend-titles__center{display:block;text-align:center;width:100%}.rv-legend-titles__right{right:0}.rv-radial-chart .rv-xy-plot__series--label{pointer-events:none} ]]> </style> """ result = '<svg class="rv-xy-plot__inner" xmlns="http://www.w3.org/2000/svg" width="' + str( width) + '" height="' + str( height) + '">' + template + svg + '</svg>' path = os.path.join(self._storage_path, str(id) + '.svg') fileOut = open(path, 'wb') fileOut.write(result) fileOut.close() resp.content_type = 'application/json' resp.status = falcon.HTTP_200 resp.data = json.dumps({'status': 'OK'})
def on_post(self, req, resp): main_logger.info("POST /infer") #self.dm.clear('ginf') queryStr = """ CONSTRUCT { ?s1 ?p1 ?o1 } WHERE { ?o1 <http://www.w3.org/2002/07/owl#sameAs> ?o2 . ?s1 ?p1 ?o2 . ?o2 ?p2 ?o3 . } """ self.dm.export('gmap') self.dm.export('ggroup') qres = self.dm.query(queryStr, ['gmap', 'ggen', 'ggroup']) ginf = Graph() for row in qres: #main_logger.info(row) pred = str(row[1]) #main_logger.info(pred) parts = pred.split('gen_') if "gen_" in pred: ginf.add((row[0], URIRef(parts[0] + parts[1]), row[2])) else: ginf.add(row) ginf.serialize("ginf.ttl", format="turtle") self.dm.add_graph(ginf, 'ginf') resp.status = falcon.HTTP_200 resp.content_type = 'application/json' resp.data = json.dumps({"status": "OK"})
def on_delete(self, req, resp): main_logger.info('ExcludedObservations DELETE' + (req.get_param('obs') or 'empty')) self.dm.remove( (URIRef(req.get_param('obs')), ns_lada['excluded'], None), ['gexc'] ) #gexc.serialize("gexc.ttl", format="turtle") resp.status = falcon.HTTP_200 resp.content_type = 'application/json' resp.data = json.dumps({"status": "OK"}, indent=1, sort_keys=True)
def on_get(self, req, resp): uri = req.get_param('uri') sheetName = req.get_param('sheet') filePath = self.dm.value(URIRef(uri), ns_lada['filePath'], 'gpubs') main_logger.info(filePath) wb = load_workbook(filename = filePath, data_only=True) sheet = wb[sheetName] data = [] isFirst = True for row in sheet.iter_rows(): if (isFirst): isFirst = False continue if len(row) > 0: rowData = [] for cell in row: if(cell.value != None and (cell.value == 'file:' or cell.value == 'comment:' or cell.value == 'label:')): break else: if (cell.value == None): rowData.append('') else: rowData.append(cell.value) if(len(rowData) > 0): data.append(rowData) resp.status = falcon.HTTP_200 resp.content_type = 'application/json' resp.data = json.dumps(data)
def on_delete(self, req, resp): main_logger.info('DELETE filtered') self.dm.clear('gfiltered') resp.status = falcon.HTTP_200 resp.content_type = 'application/json' resp.data = json.dumps({"status": "OK"})
def __init__(self, graph_storage, cc_storage, file_storage, lcdURL, lcdPort): self.storage = graph_storage self.cc_storage = cc_storage self.file_storage = file_storage self.lcdURL = lcdURL self.lcdPort = lcdPort # TODO: get this from properties self.cc_inputFolder = 'data/corpus-composition/' inputFolder = self.cc_inputFolder # load corpus compositions for infile in os.listdir(inputFolder): main_logger.info('Adding ' + infile + ' to the corpus composition graph') self.cc_storage.add_file(inputFolder + infile, "turtle", None) # calculate gcc triples main_logger.info("Number of triples in GCC:" + str(self.cc_storage.count_triples())) # process corpus mapping - this should come from LCD with open('data/corpus-mapping.json') as data_file: data = json.load(data_file) g = Graph() for abbr in data: uri = data[abbr] corpusTypeURI = URIRef( 'http://h224.it.helsinki.fi:8080/varieng/types/Corpus') g.add((URIRef(uri), RDF.type, corpusTypeURI)) g.add((URIRef(uri), RDFS.label, Literal(abbr))) self.storage.add_graph(g, 'glcd') self.storage.export('glcd')
def store_rdf(self, inputFolder): graphURI = URIRef(':' + inputFolder) for infile in os.listdir(inputFolder): main_logger.info('File:' + str(infile)) self.dm.add_file(inputFolder +'/'+ infile, 'turtle', graphURI) #graph.serialize("graph.ttl", format="turtle") return graphURI
def on_get(self, req, resp): main_logger.info('GET image') id = req.get_param('id', None) path = os.path.join(self._storage_path, str(id) + '.svg') resp.set_header('Content-Type', 'application/x-download') resp.set_header('Content-Disposition', 'attachment;filename=' + str(id) + '.svg') resp.stream = open(path, 'r')
def on_post(self, req, resp): pubURI = req.get_param('pubURI') sheet = req.get_param('sheet') raw_json = req.stream.read() obj = json.loads(raw_json.decode('utf-8')) data = [] for corpus in obj['corpora']: corpusURI = corpus['uri'] corpusLabel = corpus['label'] queryStr = """ select distinct ?cc ?label ?dim { ?cc a <http://data.hulib.helsinki.fi/ns/qb4cc#CorpusComposition> . ?cc <http://data.hulib.helsinki.fi/ns/qb4cc#corpus> <%s> . ?cc <http://purl.org/dc/terms/title> ?label . OPTIONAL { ?cc <http://purl.org/linked-data/cube#structure> ?s . ?s <http://data.hulib.helsinki.fi/ns/qb4cc#dimension> ?d . ?d <http://purl.org/linked-data/cube#dimension> ?dim . ?dim a <http://data.hulib.helsinki.fi/ns/qb4cc#Genre> . } } """ % (corpusURI) pubData = [] qres = self.dm.query_cc(queryStr) for row in qres: genres = [] if (row['dim']): #main_logger.info('has dim') main_logger.info(row['dim']) q2 = "select distinct ?uri ?label { <%s> <http://purl.org/linked-data/cube#codeList> ?list . ?uri <http://www.w3.org/2004/02/skos/core#inScheme> ?list . ?uri <http://www.w3.org/2004/02/skos/core#prefLabel> ?label } order by ?label" % ( row['dim']) main_logger.info(q2) qres2 = gcc.query(q2) for row2 in qres2: genres.append({ 'uri': row2['uri'], 'label': row2['label'] }) pubData.append({ 'uri': row['cc'], 'label': row['label'], 'genres': genres }) data.append({ 'uri': corpusURI, 'label': corpusLabel, 'options': pubData, 'selected': '' }) resp.status = falcon.HTTP_200 resp.content_type = 'application/json' resp.data = json.dumps({pubURI + sheet: data})
def create(): """Create the API endpoints.""" cors = CORS(allow_all_origins=True,allow_all_methods=True, allow_all_headers=True) app = falcon.API(middleware=[cors.middleware, MultipartMiddleware()]) dm = DataManager(InMemoryStorage(), InMemoryStorage(), data_folder, lcdURL, lcdPort) app.add_route('/annotatedFiles', AnnotatedFiles(dm)) app.add_route('/annotatedFiles/json', AnnotatedFilesJSON(dm)) app.add_route('/publications', GetPublications(dm)) app.add_route('/expression', CubeExpressions(dm)) app.add_route('/corpus', CubeCorpora(dm)) app.add_route('/genre', CubeGenres(dm)) app.add_route('/function', CubeFunctions(dm)) app.add_route('/corpus/groups', GroupCorpora(dm)) app.add_route('/expression/groups', GroupExpressions(dm)) app.add_route('/function/groups', GroupFunctions(dm)) app.add_route('/genre/groups', GroupGenres(dm)) app.add_route('/groups', Groups(dm)) app.add_route('/obs/filtered', FilteredObservations(dm)) app.add_route('/obs/filtered/query', QueryFilteredObservations(dm)) app.add_route('/obs/filtered/result', FilteredResultObservations(dm)) app.add_route('/obs/filtered/preview', FilteredObservationsPreview(dm)) app.add_route('/infer', Infer(dm)) app.add_route('/cc/filtered', CCFiltered(dm)) app.add_route('/normalize', Normalize(dm)) app.add_route('/obs/norm2', CreateNormalizedCube(dm)) app.add_route('/obs/norm/query', QueryNormalizedCube(dm)) app.add_route('/obs/norm/defs', NormalizedCubeDefinitions(dm)) app.add_route('/obs/excluded', ExcludedObservations(dm)) app.add_route('/pub/excluded', ExcludedPublications(dm)) app.add_route('/image', Image(image_folder)) app.add_route('/lcd/status', CheckForLCDConnection(dm)) main_logger.info('App2 is running.') return app
def on_post(self, req, resp): data = { 'files': [] } filenames = req.params for filename in filenames: try: main_logger.info(filename) file = req.get_param(filename) path = self.save_file(file) rdfPath = self.generate_rdf(path) graphURI = self.store_rdf(rdfPath) self.transformLiterals(graphURI, 'data:Expression', 'expression') self.transformLiterals(graphURI, 'data:Function', 'function') self.transformLiterals(graphURI, 'data:Genre', 'genre') self.transformPeriods(graphURI, 'timeperiod') self.transformResources(graphURI, 'data:Corpus', 'corpus') self.dm.export('ggen') pubMetadata = self.get_publication_metadata(graphURI) self.store_publication_metadata(pubMetadata, path, rdfPath) data['files'].append( { 'filename': filename, 'path': path, 'uri': str(graphURI), 'status': 'OK' } ); except: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback, limit=5, file=sys.stdout) data['files'].append( { 'filename': filename, 'path': path, 'uri': str(graphURI), 'status': 'ERROR', 'message': 'change me' } ); #gpubs.serialize("pubs.ttl", format="turtle") #g.serialize("g.ttl", format="turtle") resp.status = falcon.HTTP_200 resp.content_type = 'application/json' resp.data = json.dumps(data)
def on_delete(self, req, resp): main_logger.info('Delete: annotatedFiles') uri = req.get_param('uri') folder = req.get_param('file') # delete rdf folder # delete annotated file # remove rdf from the pubs graph self.dm.remove( (URIRef(uri), None, None), None ) # remove cube data cubeGraphURI = URIRef(':' + folder) main_logger.info(cubeGraphURI) self.dm.clear(cubeGraphURI) #g.serialize("g.ttl", format="turtle") resp.status = falcon.HTTP_200 resp.content_type = 'application/json'
def on_get(self, req, resp): normalizationBase = req.get_param_as_int('base', 0) ccURI = req.get_param('ccURI', None) startYear = req.get_param('startYear', None) endYear = req.get_param('endYear', None) absValue = req.get_param_as_int('absValue', 0) obs = req.get_param('obs', None) pub = req.get_param('pub', None) sheet = req.get_param('sheet', None) ckey = req.get_param('ckey', None) ccGenre = req.get_param('ccgenre', None) ccGenres = req.get_param_as_list('ccgenres', None) genrePart = '' if ccGenres: gfilters = [] for f in ccGenres: gfilters.append('<' + f + '>') genrePart = '?part qb4cc:genre ?genre . FILTER (?genre IN(' + ','.join( gfilters) + ')) .' query = self.query_template.format(normalizationBase, absValue, ccURI, startYear, endYear, genrePart) main_logger.info(query) qres = self.dm.query_cc(query) normValue = -1 for row in qres: main_logger.info(row) normValue = row['normalizedValue'] resp.status = falcon.HTTP_200 resp.content_type = 'application/json' key = pub + sheet #if(genre): # resp.data = json.dumps({ key: { ckey: { genre: normValue }}}) #else: resp.data = json.dumps({key: {ckey: normValue}})
def on_post(self, req, resp): main_logger.info('POST norm2') raw_json = req.stream.read() obj = json.loads(raw_json.decode('utf-8')) main_logger.info(obj) s = ns_lada['cube'] dsd = ns_lada['structure'] gresult = Graph() gresult.add((s, RDF.type, ns_cube['DataSet'])) gresult.add((s, ns_cube['structure'], dsd)) for key in obj: main_logger.info(key) clusters = obj[key] for o in clusters: if o['selected']: uri = URIRef(o['selected']) obs = o['values'][o['selected']] # find the one with the per = 1 for vkey in o['values']: value = o['values'][vkey] if value['per'] == 1: uri = URIRef(value['obs']) gresult.add((uri, RDF.type, ns_cube['Observation'])) gresult.add((uri, ns_lada['frequency'], Literal(float(obs['freq'])))) #if o['period']: # gresult.add( (uri, ns_lada['timeperiod'], URIRef(o['period'])) ) #if o['corpus2']: # gresult.add( (uri, ns_lcd['corpus'], URIRef(o['corpus2']) )) # if o['genre']: # gresult.add( (uri, ns_lada['genre'], URIRef(o['genre']) )) #if not filters['noexpression'] and o['exp']: # and !noexpression #gresult.add( (uri, ns_lada['expression'], URIRef(o['exp']) )) # if o['func']: # gresult.add( (uri, ns_lada['function'], URIRef(o['func']) )) gresult.serialize("gresult3.ttl", format="turtle") self.dm.add_graph(gresult, 'gresult')
def on_post(self, req, resp): raw_json = req.stream.read() test = None if len(raw_json) > 0: test = json.loads(raw_json.decode('utf-8')) main_logger.info(test) variables = [] variables.append('?' + test['dimension']['id'] + '_label') # check for second dimension if 'dimension' in test['dimension']: variables.append('?' + test['dimension']['dimension']['id'] + '_label') slicePart = '' for sc in test['slices']: if (sc['type'] == 'value'): slicePart = slicePart + self.create_value_filter(sc) else: slicePart = slicePart + self.create_filter(sc) #slicePart = '. '.join(self.create_filter(sc) ) dimensionPart = self.create_filter(test['dimension']) # check for second dimension if 'dimension' in test['dimension']: dimensionPart = dimensionPart + self.create_filter( test['dimension']['dimension']) query = """ select {0} ?obs ?value {{ ?obs a <http://purl.org/linked-data/cube#Observation> . ?obs <http://lada/frequency> ?value . {1} {2} }} order by {0} """.format(' '.join(variables), slicePart, dimensionPart) main_logger.info(query) results = [] legendItems = [] qres = self.dm.query(query, ['gresult', 'ggroup', 'ggen', 'ginf']) #qres = g.query(query) # array order: obs, dimension.2, dimension.1 dim1Data = [] dim2Data = [] # legend items dim2Dict = {} if 'dimension' in test['dimension']: previousDim1Value = None previousDim2Value = None if qres.__nonzero__(): for row in qres: if str(row.count) != 'None': dim1Value = str(row[variables[0][1:]]) dim2Value = str(row[variables[1][1:]]) if previousDim2Value == None or previousDim2Value != dim2Value: dim2Data.append(dim2Value) if not dim2Dict.has_key(dim1Value): dim2Dict[dim1Value] = {} if not dim2Dict[dim1Value].has_key(dim2Value): dim2Dict[dim1Value][dim2Value] = float( row['value']) else: dim2Dict[dim1Value][dim2Value] = dim2Dict[ dim1Value][dim2Value] + float(row['value']) main_logger.info('added ' + row['value'] + " to " + dim1Value + ":" + dim2Value) if previousDim1Value == None: dim1Data.append(dim1Value) elif previousDim1Value != dim1Value: dim1Data.append(dim1Value) previousDim1Value = dim1Value previousDim2Value = dim2Value main_logger.info(dim1Data) main_logger.info(dim2Data) main_logger.info(dim2Dict) main_logger.info('step 2') for v2 in sorted(dim2Data): if not v2 in legendItems: legendItems.append(v2) inner = [] for v1 in sorted(dim2Dict): obj = {} obj['x'] = v1 if dim2Dict[v1].has_key(v2): obj['y'] = dim2Dict[v1][v2] obj['label'] = v2 else: obj['y'] = None obj['label'] = v2 inner.append(obj) results.append(inner) #for dim2Obj in test['dimension']['dimension']['values']: # dim2 = dim2Obj['label'] # legendItems.append(dim2) # inner = [] # for dim1Obj in test['dimension']['values']: # dim1 = dim1Obj['label'] # if dim2Dict.has_key(dim1) and dim2Dict[dim1].has_key(dim2): # obj = { 'x': dim1, 'y': dim2Dict[dim1][dim2]} # inner.append( obj) # else: # obj = { 'x': dim1, 'y': 0} # inner.append( obj) # main_logger.info(inner) # results.append(inner) main_logger.info('done') else: previousDimValue = None sum = 0 if qres.__nonzero__(): _results = [] for row in qres: if str(row.count) != 'None': dimValue = row[variables[0][1:]] if previousDimValue == None or previousDimValue == dimValue: sum = sum + float(row['value']) else: _results.append({'x': previousDimValue, 'y': sum}) sum = float(row['value']) previousDimValue = dimValue _results.append({'x': previousDimValue, 'y': sum}) results.append(_results) data = {} data['results'] = results data['legend'] = legendItems resp.data = json.dumps(data) resp.status = falcon.HTTP_200 resp.content_type = 'application/json'
def on_post(self, req, resp): main_logger.info('POST query filtered') raw_json = req.stream.read() obj = json.loads(raw_json.decode('utf-8')) main_logger.info(obj) expressions = obj['expression'] expFilters = [] for f in expressions: if (f['type'] != 'group'): expFilters.append('<' + f['uri'] + '>') corpora = obj['corpus'] corpusFilters = [] for f in corpora: if (f['type'] != 'group'): corpusFilters.append('<' + f['uri'] + '>') genres = obj['genre'] genreFilters = [] for f in genres: if (f['type'] != 'group'): genreFilters.append('<' + f['uri'] + '>') functions = obj['function'] funcFilters = [] for f in functions: if (f['type'] != 'group'): funcFilters.append('<' + f['uri'] + '>') query = """ select distinct ?obs ?pub ?title ?year ?authors ?pubExcluded ?ds ?excluded ?row ?col ?sheet ?comment ?freq ?per ?corpus ?corpus2 ?corpusName ?genre ?genreName ?exp ?expName ?func ?funcName ?period ?periodName { ?obs a <http://purl.org/linked-data/cube#Observation> . ?obs <http://lada/filtered> ?f . ?pub <http://lada/file> ?file . ?pub <http://purl.org/dc/terms/title> ?title . ?pub <http://purl.org/dc/terms/issued> ?year . ?pub <http://purl.org/dc/terms/creator> ?authors . #FILTER NOT EXISTS { OPTIONAL { ?pub <http://lada/excluded> ?pubExcluded } . ?ds <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/file> ?file . ?obs <http://lada/sheet> ?sheet . ?obs <http://lada/timeperiod> ?period . ?period <http://www.w3.org/2000/01/rdf-schema#label> ?periodName . ?obs <http://purl.org/linked-data/cube#dataSet> ?ds . OPTIONAL { ?ds <http://www.w3.org/2000/01/rdf-schema#comment> ?comment . } . ?obs <http://lada/row> ?row . ?obs <http://lada/col> ?col . ?obs <http://lada/sheet> ?sheet . ?obs <http://lada/timeperiod> ?period . ?period <http://www.w3.org/2000/01/rdf-schema#label> ?periodName . #FILTER NOT EXISTS { OPTIONAL { ?obs <http://lada/excluded> ?excluded . } . ?obs <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/data/frequency> ?freq . ?obs <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/data/per> ?per . #FILTER(?freq > 0) . """ # noexpression: 1 = no value, 2 = any or no value 3 = some value, none = specific values if obj['noexpression']: if obj['noexpression'] == 1: query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_expression> ?exp } . " if obj['noexpression'] == 2: query = query + " OPTIONAL { ?obs <http://lada/gen_expression> ?exp . ?exp <http://www.w3.org/2000/01/rdf-schema#label> ?expName . }" if obj['noexpression'] == 3: query = query + " ?obs <http://lada/gen_expression> ?exp . ?exp <http://www.w3.org/2000/01/rdf-schema#label> ?expName ." else: if (len(expFilters) > 0): joined = ", ".join(expFilters) query = query + "?obs <http://lada/expression> ?exp . ?exp <http://www.w3.org/2000/01/rdf-schema#label> ?expName . FILTER(?exp IN(" + joined + ")) . " if obj['nocorpus']: if obj['nocorpus'] == 1: # no value query = query + " FILTER NOT EXISTS { ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus } . " if obj['nocorpus'] == 2: query = query + " OPTIONAL { ?obs <http://lada/corpus> ?corpus . ?corpus <http://www.w3.org/2000/01/rdf-schema#label> ?corpusName . ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus2 } . " if obj['nocorpus'] == 3: query = query + " ?obs <http://lada/corpus> ?corpus . ?corpus <http://www.w3.org/2000/01/rdf-schema#label> ?corpusName . ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus2 . " else: # some value query = query + """ ?obs <http://lada/corpus> ?corpus . ?corpus <http://www.w3.org/2000/01/rdf-schema#label> ?corpusName . # TODO: get rid of this filter! ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus2 """ if len(corpusFilters) > 0: # specific values joined = ", ".join(corpusFilters) query = query + " FILTER(?corpus IN(" + joined + ")) . " if obj['nofunction']: if obj['nofunction'] == 1: query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_function> ?func } . " if obj['nofunction'] == 2: query = query + "OPTIONAL { ?obs <http://lada/gen_function> ?func . ?func <http://www.w3.org/2000/01/rdf-schema#label> ?funcName . } " if obj['nofunction'] == 3: query = query + " ?obs <http://lada/gen_function> ?func . ?func <http://www.w3.org/2000/01/rdf-schema#label> ?funcName . " else: query = query + "?obs <http://lada/function> ?func . ?func <http://www.w3.org/2000/01/rdf-schema#label> ?funcName . " if (len(funcFilters) > 0): joined = ", ".join(funcFilters) query = query + "FILTER(?func IN(" + joined + ")) . " if obj['nogenre']: if obj['nogenre'] == 1: query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_genre> ?genre } . " if obj['nogenre'] == 2: query = query + "OPTIONAL { ?obs <http://lada/gen_genre> ?genre . ?genre <http://www.w3.org/2000/01/rdf-schema#label> ?genreName . } " if obj['nogenre'] == 3: query = query + " ?obs <http://lada/gen_genre> ?genre . ?genre <http://www.w3.org/2000/01/rdf-schema#label> ?genreName ." else: if (len(genreFilters) > 0): joined = ", ".join(genreFilters) query = query + "?obs <http://lada/genre> ?genre . ?genre <http://www.w3.org/2000/01/rdf-schema#label> ?genreName . FILTER(?genre IN(" + joined + ")) . " query = query + "} order by ?pub ?sheet ?row ?col " qres = self.dm.query_all(query) data = [] pubs = {} sheets = {} # Group by publication -> sheet (ds) # pub # - sheets[] # - obs[] # Group together observations that share common dimension values main_logger.info('GO') for row in qres: #main_logger.info(row) pub = row['pub'] sheet = row['sheet'] main_logger.info(sheet) if not pub in pubs: newPub = { 'pub': row['pub'], 'title': row['title'], 'year': row['year'], 'authors': row['authors'], 'excluded': row['pubExcluded'], 'sheets': [] } data.append(newPub) pubs[pub] = newPub if (pub + sheet) not in sheets: newSheet = { 'name': row['sheet'], 'desc': row['comment'], 'obs': [] } pubs[pub]['sheets'].append(newSheet) sheets[pub + sheet] = newSheet sheets[pub + sheet]['obs'].append({ 'obs': row['obs'], 'excluded': row['excluded'], 'row': int(row['row']), 'col': int(row['col']), 'freq': row['freq'], 'per': int(row['per']), #'corpus': row['corpus'], 'corpus2': row['corpus2'], 'corpusName': row['corpusName'], 'genre': row['genre'], 'genreName': row['genreName'], 'exp': row['exp'], 'expName': row['expName'], 'func': row['func'], 'funcName': row['funcName'], 'period': row['period'], 'periodName': row['periodName'] }) resp.status = falcon.HTTP_200 resp.content_type = 'application/json' resp.data = json.dumps(data)
def on_get(self, req, resp): main_logger.info('GET cube definitions') graphURI = req.get_param('graphuri', default=None) measures = [] dimensions = [] attributes = [] # query for measures measure_query = """ select ?measure ?label ?range { ?s a <http://purl.org/linked-data/cube#DataStructureDefinition> . ?s <http://purl.org/linked-data/cube#component> ?c . ?c <http://purl.org/linked-data/cube#measure> ?measure . ?c <http://www.w3.org/2000/01/rdf-schema#label> ?label . ?measure <http://www.w3.org/2000/01/rdf-schema#range> ?range } order by ?label """ main_logger.info(measure_query) qres = self.dm.query(measure_query, ['gresult', 'ggroup', 'ggen']) if qres.__nonzero__(): for row in qres: main_logger.info(row) measures.append({ 'uri': str(row['measure']), 'label': str(row['label']), 'range': str(row['range']) }) dimension_query = """ select distinct ?dim ?label ?range ?codelist { ?s a <http://purl.org/linked-data/cube#DataStructureDefinition> . ?s <http://purl.org/linked-data/cube#component> ?c . ?c <http://purl.org/linked-data/cube#dimension> ?dim . ?c <http://www.w3.org/2000/01/rdf-schema#label> ?label . ?dim <http://www.w3.org/2000/01/rdf-schema#range> ?range OPTIONAL { ?dim <http://purl.org/linked-data/cube#codeList> ?codelist } } order by ?label """ main_logger.info(dimension_query) qres = self.dm.query(dimension_query, ['gresult', 'ggroup', 'ggen']) if qres.__nonzero__(): main_logger.info('test') for row in qres: main_logger.info(row) dimRange = str(row['range']) prop = str(row['dim']) obj = { 'uri': prop, 'label': str(row['label']), 'range': dimRange } main_logger.info('codelist: ' + str(row['codelist'])) if row['codelist'] != None and dimRange == 'http://www.w3.org/2004/02/skos/core#Concept': codelist = row['codelist'] qres2 = self.dm.query( """ select distinct ?uri ?label {{ ?uri <http://www.w3.org/2004/02/skos/core#inScheme> <{0}> . ?uri <http://www.w3.org/2004/02/skos/core#prefLabel> ?label }} order by ?label """.format(codelist), ['gresult', 'ggroup', 'ggen']) concepts = [] for row2 in qres2: concepts.append({ 'uri': row2['uri'], 'label': row2['label'] }) obj['concepts'] = concepts elif row[ 'codelist'] != None and dimRange == 'http://data.hulib.helsinki.fi/ns/qb4cc#TimePeriod': codelist = row['codelist'] qres2 = self.dm.query( """ select distinct ?uri ?label {{ ?uri <http://www.w3.org/2004/02/skos/core#inScheme> <http://lada/codelist/period> . ?uri <http://www.w3.org/2004/02/skos/core#prefLabel> ?label }} order by ?label """, ['gresult']) values = [] for row2 in qres2: values.append(row2['label']) obj['values'] = values elif dimRange.startswith( 'http://www.w3.org/2001/XMLSchema#'): # literals qres2 = self.dm.query( """ select distinct ?value {{ ?uri <{0}> ?value }} order by ?value """.format(prop), ['gresult', 'ggroup', 'ggen']) values = [] for row2 in qres2: values.append(row2['value']) obj['values'] = values else: # object objects, should have rdfs:labels or preflabels qres2 = self.dm.query( """ select distinct ?uri ?label {{ ?obj <{0}> ?uri . OPTIONAL {{ ?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label }} }} order by ?label """.format(prop), ['gresult', 'ggroup', 'ggen']) objs = [] for row2 in qres2: objs.append({ 'uri': row2['uri'], 'label': (row2['label'] if row2['label'] != None else row2['uri']) }) #obj['objs'] = objs obj['concepts'] = objs dimensions.append(obj) main_logger.info('done') results = {} results['measures'] = measures results['dimensions'] = dimensions resp.data = json.dumps(results) resp.status = falcon.HTTP_200 resp.content_type = 'application/json'
def on_post(self, req, resp): main_logger.info("ExcludedObservations POST with " + (req.get_param('obs') or 'empty' )) self.dm.add_triple( (URIRef(req.get_param('obs')), ns_lada['excluded'], Literal(1) ), 'gexc' )
def on_post(self, req, resp): main_logger.info('POST cube definitions') raw_json = req.stream.read() obj = json.loads(raw_json.decode('utf-8')) main_logger.info(obj) self.dm.clear('gresult') gresult = Graph() s = ns_lada['structure'] gresult.add((s, RDF.type, ns_cube['DataStructureDefinition'])) # only measure is frequency mnode = BNode() gresult.add((s, ns_cube['component'], mnode)) gresult.add((mnode, RDFS.label, Literal('normalized frequency'))) measure = ns_lada['measure#frequency'] gresult.add((measure, RDF.type, ns_cube['Measure'])) gresult.add((measure, RDFS.range, ns_xsd['decimal'])) gresult.add((mnode, ns_cube['measure'], measure)) # TODO: add normalization base to the measure if not obj['nocorpus'] or len(obj['corpus']) > 0: anode = BNode() #corpusDim = ns_lcd['Corpus'] corpusDim = ns_lada['corpus'] gresult.add((s, ns_cube['component'], anode)) gresult.add((anode, RDFS.label, Literal('Corpus'))) gresult.add((anode, ns_cube['dimension'], corpusDim)) gresult.add((corpusDim, RDF.type, ns_qb4cc['corpus'])) gresult.add((corpusDim, RDFS.range, ns_lcd['Corpus'])) if len(obj['corpus']) > 0: # for c in obj['corpus']: # gresult.add( (URIRef(c['uri']), RDFS.label, Literal(c['label'])) ) cList = ns_lada['codelist/corpus'] gresult.add((ns_lada['corpus'], ns_cube['codeList'], cList)) for c in obj['corpus']: gresult.add( (URIRef(c['uri']), RDFS.label, Literal(c['label']))) gresult.add( (URIRef(c['uri']), SKOS.prefLabel, Literal(c['label']))) gresult.add((URIRef(c['uri']), SKOS.inScheme, cList)) if not obj['noexpression'] or len(obj['expression']) > 0: anode = BNode() dim = ns_lada['expression'] gresult.add((s, ns_cube['component'], anode)) gresult.add((anode, RDFS.label, Literal('Expression'))) gresult.add((anode, ns_cube['dimension'], dim)) gresult.add((dim, RDF.type, ns_qb4cc['expression'])) #gresult.add( (dim, RDFS.range, ns_lcd['Expression']) ) gresult.add((dim, RDFS.range, ns_skos['Concept'])) if len(obj['expression']) > 0: expList = ns_lada['codelist/exp'] gresult.add((ns_lada['expression'], ns_cube['codeList'], expList)) for c in obj['expression']: gresult.add( (URIRef(c['uri']), RDFS.label, Literal(c['label']))) gresult.add( (URIRef(c['uri']), SKOS.prefLabel, Literal(c['label']))) gresult.add((URIRef(c['uri']), SKOS.inScheme, expList)) if not obj['nofunction'] or len(obj['function']) > 0: anode = BNode() dim = ns_lada['function'] gresult.add((s, ns_cube['component'], anode)) gresult.add((anode, RDFS.label, Literal('Function'))) gresult.add((anode, ns_cube['dimension'], dim)) gresult.add((dim, RDF.type, ns_qb4cc['function'])) #gresult.add( (dim, RDFS.range, ns_lcd['Expression']) ) gresult.add((dim, RDFS.range, ns_skos['Concept'])) if len(obj['function']) > 0: expList = ns_lada['codelist/func'] gresult.add((ns_lada['function'], ns_cube['codeList'], expList)) for c in obj['function']: gresult.add( (URIRef(c['uri']), RDFS.label, Literal(c['label']))) gresult.add( (URIRef(c['uri']), SKOS.prefLabel, Literal(c['label']))) gresult.add((URIRef(c['uri']), SKOS.inScheme, expList)) if not obj['nogenre'] or len(obj['genre']) > 0 or obj['nogenre'] != 1: anode = BNode() dim = ns_lada['genre'] gresult.add((s, ns_cube['component'], anode)) gresult.add((anode, RDFS.label, Literal('Genre'))) gresult.add((anode, ns_cube['dimension'], dim)) gresult.add((dim, RDF.type, ns_qb4cc['genre'])) #gresult.add( (dim, RDFS.range, ns_lcd['Expression']) ) gresult.add((dim, RDFS.range, ns_skos['Concept'])) if len(obj['genre']) > 0: expList = ns_lada['codelist/genre'] gresult.add((ns_lada['genre'], ns_cube['codeList'], expList)) for c in obj['genre']: gresult.add( (URIRef(c['uri']), RDFS.label, Literal(c['label']))) gresult.add( (URIRef(c['uri']), SKOS.prefLabel, Literal(c['label']))) gresult.add((URIRef(c['uri']), SKOS.inScheme, expList)) #TODO: add other dimensiton types hre # corpusList = ns_lada['codelist/corpus'] # gresult.add( (corpusDim, ns_cube['codeList'], corpusList )) if len(obj['timeperiod']) > 0: periodnode = BNode() periodDim = ns_lada['timeperiod'] gresult.add((s, ns_cube['component'], periodnode)) gresult.add((periodnode, RDFS.label, Literal('Time period'))) gresult.add((periodnode, ns_cube['dimension'], periodDim)) gresult.add((periodDim, RDF.type, ns_qb4cc['timePeriod'])) gresult.add((periodDim, RDFS.range, ns_qb4cc['TimePeriod'])) periodList = ns_lada['codelist/period'] gresult.add( (ns_lada['timeperiod'], ns_cube['codeList'], periodList)) for c in obj['timeperiod']: gresult.add( (URIRef(c['uri']), RDFS.label, Literal(c['label']))) gresult.add( (URIRef(c['uri']), SKOS.prefLabel, Literal(c['label']))) gresult.add((URIRef(c['uri']), SKOS.inScheme, periodList)) gresult.serialize("gresult.ttl", format="turtle") self.dm.add_graph(gresult, 'gresult') resp.status = falcon.HTTP_200 resp.content_type = 'application/json' # resp.data = json.dumps({'status': 'OK'}) resp.data = json.dumps(obj)
def on_post(self, req, resp): main_logger.info('Preview') raw_json = req.stream.read() obj = json.loads(raw_json.decode('utf-8')) main_logger.info(obj) expressions = obj['expression'] expFilters = [] for f in expressions: if (f['type'] != 'group'): for fv in f['values']: expFilters.append('<' + fv + '>') corpora = obj['corpus'] corpusFilters = [] for f in corpora: if (f['type'] != 'group'): for fv in f['values']: corpusFilters.append('<' + fv + '>') genres = obj['genre'] genreFilters = [] for f in genres: if (f['type'] != 'group'): for fv in f['values']: genreFilters.append('<' + fv + '>') functions = obj['function'] funcFilters = [] for f in functions: if (f['type'] != 'group'): for fv in f['values']: funcFilters.append('<' + fv + '>') query = """ select distinct ?title ?pub ?ds ?obs { ?obs a <http://purl.org/linked-data/cube#Observation> . ?obs <http://purl.org/linked-data/cube#dataSet> ?ds . ?ds <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/file> ?file . ?pub <http://lada/file> ?file . ?pub <http://purl.org/dc/terms/title> ?title . ?obs <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/data/frequency> ?freq . ?obs <http://lada/timeperiod> ?period . ?period <http://www.w3.org/2000/01/rdf-schema#label> ?periodName . #FILTER(?freq > 0) . """ if obj['noexpression']: if obj['noexpression'] == 1: query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_expression> ?exp } . " if obj['noexpression'] == 3: query = query + " ?obs <http://lada/gen_expression> ?exp . " else: if (len(expFilters) > 0): joined = ", ".join(expFilters) query = query + "?obs <http://lada/gen_expression> ?exp . FILTER(?exp IN(" + joined + ")) . " if obj['nocorpus']: if obj['nocorpus'] == 1: # no value query = query + " FILTER NOT EXISTS { ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus } . " if obj['nocorpus'] == 2: query = query + " OPTIONAL { ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus . ?corpus <http://www.w3.org/2000/01/rdf-schema#label> ?corpusName . } " if obj['nocorpus'] == 3: query = query + " ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus . " else: # some value query = query + """ ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus . ?corpus <http://www.w3.org/2000/01/rdf-schema#label> ?corpusName . """ if len(corpusFilters) > 0: # specific values joined = ", ".join(corpusFilters) query = query + " FILTER(?corpus IN(" + joined + ")) . " # if obj['nocorpus']: # if obj['nocorpus'] == 1: # query = query + " FILTER NOT EXISTS { ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus } . " # if obj['nocorpus'] == 3: # query = query + " ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus . " # else: # query = query + """ # ?obs <http://lada/corpus> ?corpus . # # TODO: get rid of this filter! # #FILTER NOT EXISTS { ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus } # """ # joined = ", ".join(corpusFilters) # query = query + " FILTER(?corpus IN(" + joined + ")) . " if obj['nofunction']: if obj['nofunction'] == 1: query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_function> ?func } . " if obj['nofunction'] == 3: query = query + " ?obs <http://lada/gen_function> ?func . " else: if (len(funcFilters) > 0): joined = ", ".join(funcFilters) query = query + "?obs <http://lada/gen_function> ?func . FILTER(?func IN(" + joined + ")) . " if obj['nogenre']: if obj['nogenre'] == 1: query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_genre> ?genre } . " if obj['nogenre'] == 3: query = query + " ?obs <http://lada/gen_genre> ?genre . " else: if (len(genreFilters) > 0): joined = ", ".join(genreFilters) query = query + "?obs <http://lada/gen_genre> ?genre . FILTER(?genre IN(" + joined + ")) . " query = query + "} order by ?title ?ds ?obs" pubs = [] data = {} previousDs = None previousPub = None numOfDs = 0 numOfObs = 0 main_logger.info(query) qres = self.dm.query_all(query) for row in qres: #main_logger.info(row) pub = row['pub'] ds = row['ds'] obs = row['obs'] if not pub in data: data[pub] = {'title': row['title'], 'uri': row['pub']} if previousDs == None and previousPub == None: main_logger.info('first row') data[pub]['obs'] = 1 data[pub]['ds'] = 1 previousDs = ds previousPub = pub continue if previousDs != ds: numOfDs = numOfDs + 1 data[pub]['ds'] = numOfDs data[pub]['obs'] = numOfObs if previousPub != pub: data[previousPub]['ds'] = numOfDs data[previousPub]['obs'] = numOfObs + 1 numOfObs = 0 numOfDs = 0 numOfObs = numOfObs + 1 previousDs = ds previousPub = pub if previousPub != None: data[previousPub]['obs'] = numOfObs for key in data: pubs.append(data[key]) main_logger.info(data) resp.status = falcon.HTTP_200 resp.content_type = 'application/json' resp.data = json.dumps(pubs)
def on_get(self, req, resp): main_logger.info('GET filtered') query = """ select distinct ?obs ?pub ?title ?year ?authors ?pubExcluded ?ds ?excluded ?row ?col ?sheet ?comment ?freq ?per ?corpus2 ?corpusName ?genre ?genreName ?exp ?expName ?func ?funcName ?period ?periodName { #graph <http://lada/graph/filtered> { ?obs a <http://purl.org/linked-data/cube#Observation> . ?obs <http://lada/filtered> ?f . #} ?pub <http://lada/file> ?file . ?pub <http://purl.org/dc/terms/title> ?title . ?pub <http://purl.org/dc/terms/issued> ?year . ?pub <http://purl.org/dc/terms/creator> ?authors . OPTIONAL { ?pub <http://lada/excluded> ?pubExcluded } . ?ds <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/file> ?file . ?obs <http://purl.org/linked-data/cube#dataSet> ?ds . OPTIONAL { ?ds <http://www.w3.org/2000/01/rdf-schema#comment> ?comment . } . ?obs <http://lada/row> ?row . ?obs <http://lada/col> ?col . ?obs <http://lada/sheet> ?sheet OPTIONAL { ?obs <http://lada/gen_expression> ?exp . ?exp <http://www.w3.org/2000/01/rdf-schema#label> ?expName . } OPTIONAL { ?obs <http://lada/gen_function> ?func . ?func <http://www.w3.org/2000/01/rdf-schema#label> ?funcName . } OPTIONAL { ?obs <http://lada/gen_genre> ?genre . ?genre <http://www.w3.org/2000/01/rdf-schema#label> ?genreName . } # OPTIONAL { ?obs <http://lada/timeperiod> ?period . ?period <http://www.w3.org/2000/01/rdf-schema#label> ?periodName . # } ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus2 . ?corpus2 <http://www.w3.org/2000/01/rdf-schema#label> ?corpusName . OPTIONAL { ?obs <http://lada/excluded> ?excluded . } . ?obs <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/data/frequency> ?freq . ?obs <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/data/per> ?per . #FILTER(?freq > 0) . } order by ?pub ?sheet ?row ?col """ main_logger.info(query) qres = self.dm.query_all(query) data = [] pubs = {} sheets = {} # Group by publication -> sheet (ds) # pub # - sheets[] # - obs[] # Group together observations that share common dimension values main_logger.info('GO') for row in qres: #main_logger.info(row) pub = row['pub'] sheet = row['sheet'] main_logger.info(sheet) if not pub in pubs: newPub = { 'pub': row['pub'], 'title': row['title'], 'year': row['year'], 'authors': row['authors'], 'excluded': row['pubExcluded'], 'sheets': [] } data.append(newPub) pubs[pub] = newPub if (pub + sheet) not in sheets: newSheet = { 'name': row['sheet'], 'desc': row['comment'], 'obs': [] } pubs[pub]['sheets'].append(newSheet) sheets[pub + sheet] = newSheet sheets[pub + sheet]['obs'].append({ 'obs': row['obs'], 'excluded': row['excluded'], 'row': int(row['row']), 'col': int(row['col']), 'freq': row['freq'], 'per': int(row['per']), #'corpus': row['corpus'], 'corpus2': row['corpus2'], 'corpusName': row['corpusName'], 'genre': row['genre'], 'genreName': row['genreName'], 'exp': row['exp'], 'expName': row['expName'], 'func': row['func'], 'funcName': row['funcName'], 'period': row['period'], 'periodName': row['periodName'] }) resp.status = falcon.HTTP_200 resp.content_type = 'application/json' resp.data = json.dumps(data)
def on_post(self, req, resp): main_logger.info('GET filtered result') raw_json = req.stream.read() obj = json.loads(raw_json.decode('utf-8')) main_logger.info(obj) #corpus = req.get_param('corpus', None) #exp = req.get_param('exp', None) #func = req.get_param('func', None) #genre = req.get_param('genre', None) expressions = obj['expression'] expFilters = [] for f in expressions: if (f['type'] != 'group'): expFilters.append('<' + f['uri'] + '>') corpora = obj['corpus'] corpusFilters = [] for f in corpora: if (f['type'] != 'group'): corpusFilters.append('<' + f['uri'] + '>') genres = obj['genre'] genreFilters = [] for f in genres: if (f['type'] != 'group'): genreFilters.append('<' + f['uri'] + '>') functions = obj['function'] funcFilters = [] for f in functions: if (f['type'] != 'group'): funcFilters.append('<' + f['uri'] + '>') query = """ select distinct ?obs ?freq ?per ?corpus ?corpusName ?genre ?genreName ?exp ?expName ?func ?funcName ?period ?periodName { ?obs a <http://purl.org/linked-data/cube#Observation> . ?obs <http://lada/filtered> ?f . ?pub <http://lada/file> ?file . ?pub <http://purl.org/dc/terms/title> ?title . ?pub <http://purl.org/dc/terms/issued> ?year . ?pub <http://purl.org/dc/terms/creator> ?authors . FILTER NOT EXISTS { ?pub <http://lada/excluded> ?pubExcluded } . ?ds <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/file> ?file . ?obs <http://purl.org/linked-data/cube#dataSet> ?ds . ?obs <http://lada/sheet> ?sheet . # OPTIONAL { # ?obs <http://lada/function> ?func . # ?func <http://www.w3.org/2000/01/rdf-schema#label> ?funcName . # } # OPTIONAL { # ?obs <http://lada/genre> ?genre . # ?genre <http://www.w3.org/2000/01/rdf-schema#label> ?genreName . # } # OPTIONAL { ?obs <http://lada/timeperiod> ?period . ?period <http://www.w3.org/2000/01/rdf-schema#label> ?periodName . # } FILTER NOT EXISTS { ?obs <http://lada/excluded> ?excluded . } . ?obs <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/data/frequency> ?freq . ?obs <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/data/per> ?per . #FILTER(?freq > 0) . """ # noexpression: 1 = no value, 2 = any or no value 3 = some value, none = specific values if obj['noexpression']: if obj['noexpression'] == 1: query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_expression> ?exp } . " if obj['noexpression'] == 2: query = query + " OPTIONAL { ?obs <http://lada/gen_expression> ?exp . ?exp <http://www.w3.org/2000/01/rdf-schema#label> ?expName . }" if obj['noexpression'] == 3: query = query + " ?obs <http://lada/gen_expression> ?exp . ?exp <http://www.w3.org/2000/01/rdf-schema#label> ?expName ." else: if (len(expFilters) > 0): joined = ", ".join(expFilters) query = query + "?obs <http://lada/expression> ?exp . ?exp <http://www.w3.org/2000/01/rdf-schema#label> ?expName . FILTER(?exp IN(" + joined + ")) . " if obj['nocorpus']: if obj['nocorpus'] == 1: # no value query = query + " FILTER NOT EXISTS { ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus } . " if obj['nocorpus'] == 2: query = query + " OPTIONAL { ?obs <http://lada/corpus> ?corpus . ?corpus <http://www.w3.org/2000/01/rdf-schema#label> ?corpusName . FILTER NOT EXISTS { ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus } } . " if obj['nocorpus'] == 3: query = query + " ?obs <http://lada/corpus> ?corpus . ?corpus <http://www.w3.org/2000/01/rdf-schema#label> ?corpusName . " else: # some value query = query + """ ?obs <http://lada/corpus> ?corpus . ?corpus <http://www.w3.org/2000/01/rdf-schema#label> ?corpusName . # TODO: get rid of this filter! FILTER NOT EXISTS { ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus } """ if len(corpusFilters) > 0: # specific values joined = ", ".join(corpusFilters) query = query + " FILTER(?corpus IN(" + joined + ")) . " if obj['nofunction']: if obj['nofunction'] == 1: query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_function> ?func } . " if obj['nofunction'] == 2: query = query + "OPTIONAL { ?obs <http://lada/gen_function> ?func . ?func <http://www.w3.org/2000/01/rdf-schema#label> ?funcName . } " if obj['nofunction'] == 3: query = query + " ?obs <http://lada/gen_function> ?func . ?func <http://www.w3.org/2000/01/rdf-schema#label> ?funcName . " else: if (len(funcFilters) > 0): joined = ", ".join(funcFilters) query = query + "?obs <http://lada/function> ?func . ?func <http://www.w3.org/2000/01/rdf-schema#label> ?funcName . FILTER(?func IN(" + joined + ")) . " if obj['nogenre']: if obj['nogenre'] == 1: query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_genre> ?genre } . " if obj['nogenre'] == 2: query = query + "OPTIONAL { ?obs <http://lada/gen_genre> ?genre . ?genre <http://www.w3.org/2000/01/rdf-schema#label> ?genreName . } " if obj['nogenre'] == 3: query = query + " ?obs <http://lada/gen_genre> ?genre . ?genre <http://www.w3.org/2000/01/rdf-schema#label> ?genreName ." else: if (len(genreFilters) > 0): joined = ", ".join(genreFilters) query = query + "?obs <http://lada/genre> ?genre . ?genre <http://www.w3.org/2000/01/rdf-schema#label> ?genreName . FILTER(?genre IN(" + joined + ")) . " query = query + "} order by ?corpusName ?periodName ?expName ?genreName ?funcName " main_logger.info(query) qres = self.dm.query_all(query) data = [] for row in qres: data.append({ 'obs': row['obs'], 'freq': row['freq'], 'per': int(row['per']), 'corpus': row['corpus'], 'corpusName': row['corpusName'], 'genre': row['genre'], 'genreName': row['genreName'], 'exp': row['exp'], 'expName': row['expName'], 'func': row['func'], 'funcName': row['funcName'], 'period': row['period'], 'periodName': row['periodName'] }) resp.status = falcon.HTTP_200 resp.content_type = 'application/json' resp.data = json.dumps(data)
def on_post(self, req, resp): main_logger.info('POST filtered') raw_json = req.stream.read() obj = json.loads(raw_json.decode('utf-8')) main_logger.info(obj) expressions = obj['expression'] expFilters = [] for f in expressions: if (f['type'] != 'group'): expFilters.append('<' + f['uri'] + '>') corpora = obj['corpus'] corpusFilters = [] for f in corpora: if (f['type'] != 'group'): corpusFilters.append('<' + f['uri'] + '>') genres = obj['genre'] genreFilters = [] for f in genres: if (f['type'] != 'group'): genreFilters.append('<' + f['uri'] + '>') functions = obj['function'] funcFilters = [] for f in functions: if (f['type'] != 'group'): funcFilters.append('<' + f['uri'] + '>') query = """ construct { #?obs a <http://purl.org/linked-data/cube#Observation> ?obs <http://lada/filtered> 1 } where { """ # noexpression: 1 = no value, 2 = any or no value 3 = some value, none = specific values if obj['noexpression']: if obj['noexpression'] == 1: query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_expression> ?exp } . " if obj['noexpression'] == 2: query = query + " ?obs a <http://purl.org/linked-data/cube#Observation> . " if obj['noexpression'] == 3: query = query + " ?obs <http://lada/gen_expression> ?exp . " else: if len(expFilters) > 0: joined = ", ".join(expFilters) query = query + "?obs <http://lada/expression> ?exp . FILTER(?exp IN(" + joined + ")) . " if obj['nocorpus']: if obj['nocorpus'] == 1: # no value query = query + " FILTER NOT EXISTS { ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus } . " if obj['nocorpus'] == 2: query = query + " ?obs a <http://purl.org/linked-data/cube#Observation> . " else: # some value query = query + """ ?obs <http://lada/corpus> ?corpus . # TODO: get rid of this filter! FILTER NOT EXISTS { ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus } """ if len(corpusFilters) > 0: # specific values joined = ", ".join(corpusFilters) query = query + " FILTER(?corpus IN(" + joined + ")) . " if obj['nofunction']: if obj['nofunction'] == 1: query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_function> ?func } . " if obj['nofunction'] == 2: query = query + " ?obs a <http://purl.org/linked-data/cube#Observation> . " if obj['nofunction'] == 3: query = query + " ?obs <http://lada/gen_function> ?func . " else: if len(funcFilters) > 0: joined = ", ".join(funcFilters) query = query + "?obs <http://lada/function> ?func . FILTER(?func IN(" + joined + ")) . " if obj['nogenre']: if obj['nogenre'] == 1: query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_genre> ?genre } . " if obj['nogenre'] == 2: query = query + " ?obs a <http://purl.org/linked-data/cube#Observation> . " if obj['nogenre'] == 3: query = query + " ?obs <http://lada/gen_genre> ?genre . " else: if len(genreFilters) > 0: joined = ", ".join(genreFilters) query = query + "?obs <http://lada/genre> ?genre . FILTER(?genre IN(" + joined + ")) . " #if not obj['nocorpus'] and not obj['noexpression'] and not obj['nogenre'] and not obj['nofunction']: # query = query + " ?obs a <http://purl.org/linked-data/cube#Observation> . " query = query + "}" main_logger.info(query) #qres = (gpubs + ggen + ggroup + gmap + ginf + glcd).query(query) qres = self.dm.query_all(query) gfiltered = Graph() for triple in qres: gfiltered.add(triple) gfiltered.serialize("gfiltered.ttl", format="turtle") self.dm.add_graph(gfiltered, 'gfiltered') resp.status = falcon.HTTP_200 resp.content_type = 'application/json' resp.data = json.dumps({'status': 'OK'})