示例#1
0
    def on_post(self, req, resp):
        main_logger.info('POST image')
        id = req.get_param('id', None)
        width = req.get_param('w', 1000)
        height = req.get_param('h', 400)
        svg = req.stream.read()

        template = """
          <style type="text/css">
            <![CDATA[
            .rv-treemap{font-size:12px;position:relative}.rv-treemap__leaf{overflow:hidden;position:absolute}.rv-treemap__leaf--circle{align-items:center;border-radius:100%;display:flex;justify-content:center}.rv-treemap__leaf__content{overflow:hidden;padding:10px;text-overflow:ellipsis}.rv-xy-plot{color:#c3c3c3;position:relative}.rv-xy-plot canvas{pointer-events:none}.rv-xy-plot .rv-xy-canvas{pointer-events:none;position:absolute}.rv-xy-plot__inner{display:block}.rv-xy-plot__axis__line{fill:none;stroke-width:2px;stroke:#e6e6e9}.rv-xy-plot__axis__tick__line{stroke:#e6e6e9}.rv-xy-plot__axis__tick__text{fill:#6b6b76;font-size:11px}.rv-xy-plot__axis__title text{fill:#6b6b76;font-size:11px}.rv-xy-plot__grid-lines__line{stroke:#e6e6e9}.rv-xy-plot__circular-grid-lines__line{fill-opacity:0;stroke:#e6e6e9}.rv-xy-plot__series,.rv-xy-plot__series path{pointer-events:all}.rv-xy-plot__circular-grid-lines__line{fill-opacity:0;stroke:#e6e6e9}.rv-xy-plot__series,.rv-xy-plot__series path{pointer-events:all}.rv-xy-plot__series--line{fill:none;stroke:#000;stroke-width:2px}.rv-crosshair{position:absolute;font-size:11px;pointer-events:none}.rv-crosshair__line{background:#47d3d9;width:1px}.rv-crosshair__inner{position:absolute;text-align:left;top:0}.rv-crosshair__inner__content{border-radius:4px;background:#3a3a48;color:#fff;font-size:12px;padding:7px 10px;box-shadow:0 2px 4px rgba(0,0,0,0.5)}.rv-crosshair__inner--left{right:4px}.rv-crosshair__inner--right{left:4px}.rv-crosshair__title{font-weight:bold;white-space:nowrap}.rv-crosshair__item{white-space:nowrap}.rv-hint{position:absolute;pointer-events:none}.rv-hint__content{border-radius:4px;padding:7px 10px;font-size:12px;background:#3a3a48;box-shadow:0 2px 4px rgba(0,0,0,0.5);color:#fff;text-align:left;white-space:nowrap}.rv-discrete-color-legend{box-sizing:border-box;overflow-y:auto;font-size:12px}.rv-discrete-color-legend.horizontal{white-space:nowrap}.rv-discrete-color-legend-item{color:#3a3a48;border-radius:1px;padding:9px 10px}.rv-discrete-color-legend-item.horizontal{display:inline-block}.rv-discrete-color-legend-item.horizontal .rv-discrete-color-legend-item__title{margin-left:0;display:block}.rv-discrete-color-legend-item__color{background:#dcdcdc;display:inline-block;height:2px;vertical-align:middle;width:14px}.rv-discrete-color-legend-item__title{margin-left:10px}.rv-discrete-color-legend-item.disabled{color:#b8b8b8}.rv-discrete-color-legend-item.clickable{cursor:pointer}.rv-discrete-color-legend-item.clickable:hover{background:#f9f9f9}.rv-search-wrapper{display:flex;flex-direction:column}.rv-search-wrapper__form{flex:0}.rv-search-wrapper__form__input{width:100%;color:#a6a6a5;border:1px solid #e5e5e4;padding:7px 10px;font-size:12px;box-sizing:border-box;border-radius:2px;margin:0 0 9px;outline:0}.rv-search-wrapper__contents{flex:1;overflow:auto}.rv-continuous-color-legend{font-size:12px}.rv-continuous-color-legend .rv-gradient{height:4px;border-radius:2px;margin-bottom:5px}.rv-continuous-size-legend{font-size:12px}.rv-continuous-size-legend .rv-bubbles{text-align:justify;overflow:hidden;margin-bottom:5px;width:100%}.rv-continuous-size-legend .rv-bubble{background:#d8d9dc;display:inline-block;vertical-align:bottom}.rv-continuous-size-legend .rv-spacer{display:inline-block;font-size:0;line-height:0;width:100%}.rv-legend-titles{height:16px;position:relative}.rv-legend-titles__left,.rv-legend-titles__right,.rv-legend-titles__center{position:absolute;white-space:nowrap;overflow:hidden}.rv-legend-titles__center{display:block;text-align:center;width:100%}.rv-legend-titles__right{right:0}.rv-radial-chart .rv-xy-plot__series--label{pointer-events:none}


            ]]>
            </style>
            """
        result = '<svg class="rv-xy-plot__inner" xmlns="http://www.w3.org/2000/svg" width="' + str(
            width) + '" height="' + str(
                height) + '">' + template + svg + '</svg>'

        path = os.path.join(self._storage_path, str(id) + '.svg')
        fileOut = open(path, 'wb')
        fileOut.write(result)
        fileOut.close()

        resp.content_type = 'application/json'
        resp.status = falcon.HTTP_200
        resp.data = json.dumps({'status': 'OK'})
示例#2
0
文件: infer.py 项目: jkesanie/lada
    def on_post(self, req, resp):
        main_logger.info("POST /infer")

        #self.dm.clear('ginf')

        queryStr = """
            CONSTRUCT { ?s1 ?p1 ?o1  }
            WHERE {
                ?o1 <http://www.w3.org/2002/07/owl#sameAs> ?o2 .
                ?s1 ?p1 ?o2 .
                ?o2 ?p2 ?o3 .
            }
            """

        self.dm.export('gmap')
        self.dm.export('ggroup')
        qres = self.dm.query(queryStr, ['gmap', 'ggen', 'ggroup'])
        ginf = Graph()
        for row in qres:
            #main_logger.info(row)
            pred = str(row[1])
            #main_logger.info(pred)
            parts = pred.split('gen_')
            if "gen_" in pred:
                ginf.add((row[0], URIRef(parts[0] + parts[1]), row[2]))
            else:
                ginf.add(row)

        ginf.serialize("ginf.ttl", format="turtle")
        self.dm.add_graph(ginf, 'ginf')

        resp.status = falcon.HTTP_200
        resp.content_type = 'application/json'
        resp.data = json.dumps({"status": "OK"})
示例#3
0
 def on_delete(self, req, resp):
     main_logger.info('ExcludedObservations DELETE' + (req.get_param('obs') or 'empty'))
     self.dm.remove( (URIRef(req.get_param('obs')), ns_lada['excluded'], None), ['gexc'] )
     #gexc.serialize("gexc.ttl", format="turtle")
     resp.status = falcon.HTTP_200
     resp.content_type = 'application/json'
     resp.data = json.dumps({"status": "OK"}, indent=1, sort_keys=True)
示例#4
0
    def on_get(self, req, resp):
        uri = req.get_param('uri')
        sheetName = req.get_param('sheet')
        filePath = self.dm.value(URIRef(uri), ns_lada['filePath'], 'gpubs')
        main_logger.info(filePath)
        wb = load_workbook(filename = filePath, data_only=True)
        sheet = wb[sheetName]

        data = []
        isFirst = True
        for row in sheet.iter_rows():
            if (isFirst):
                isFirst = False
                continue
            if len(row) > 0:
                rowData = []
                for cell in row:
                    if(cell.value != None and (cell.value == 'file:' or cell.value == 'comment:' or cell.value == 'label:')):
                        break
                    else:
                        if (cell.value == None):
                            rowData.append('')
                        else:
                            rowData.append(cell.value)
                if(len(rowData) > 0):
                    data.append(rowData)


        resp.status = falcon.HTTP_200
        resp.content_type = 'application/json'
        resp.data = json.dumps(data)
示例#5
0
    def on_delete(self, req, resp):
        main_logger.info('DELETE filtered')
        self.dm.clear('gfiltered')

        resp.status = falcon.HTTP_200
        resp.content_type = 'application/json'
        resp.data = json.dumps({"status": "OK"})
示例#6
0
    def __init__(self, graph_storage, cc_storage, file_storage, lcdURL,
                 lcdPort):
        self.storage = graph_storage
        self.cc_storage = cc_storage
        self.file_storage = file_storage
        self.lcdURL = lcdURL
        self.lcdPort = lcdPort

        # TODO: get this from properties
        self.cc_inputFolder = 'data/corpus-composition/'
        inputFolder = self.cc_inputFolder
        # load corpus compositions
        for infile in os.listdir(inputFolder):
            main_logger.info('Adding ' + infile +
                             ' to the corpus composition graph')
            self.cc_storage.add_file(inputFolder + infile, "turtle", None)

        # calculate gcc triples
        main_logger.info("Number of triples in GCC:" +
                         str(self.cc_storage.count_triples()))

        # process corpus mapping - this should come from LCD
        with open('data/corpus-mapping.json') as data_file:
            data = json.load(data_file)
            g = Graph()
            for abbr in data:
                uri = data[abbr]
                corpusTypeURI = URIRef(
                    'http://h224.it.helsinki.fi:8080/varieng/types/Corpus')
                g.add((URIRef(uri), RDF.type, corpusTypeURI))
                g.add((URIRef(uri), RDFS.label, Literal(abbr)))

            self.storage.add_graph(g, 'glcd')

        self.storage.export('glcd')
示例#7
0
    def store_rdf(self, inputFolder):
        graphURI = URIRef(':' + inputFolder)
        for infile in os.listdir(inputFolder):
            main_logger.info('File:' + str(infile))
            self.dm.add_file(inputFolder +'/'+ infile, 'turtle', graphURI)

        #graph.serialize("graph.ttl", format="turtle")
        return graphURI
示例#8
0
    def on_get(self, req, resp):
        main_logger.info('GET image')
        id = req.get_param('id', None)
        path = os.path.join(self._storage_path, str(id) + '.svg')
        resp.set_header('Content-Type', 'application/x-download')
        resp.set_header('Content-Disposition',
                        'attachment;filename=' + str(id) + '.svg')

        resp.stream = open(path, 'r')
示例#9
0
    def on_post(self, req, resp):
        pubURI = req.get_param('pubURI')
        sheet = req.get_param('sheet')
        raw_json = req.stream.read()
        obj = json.loads(raw_json.decode('utf-8'))

        data = []

        for corpus in obj['corpora']:
            corpusURI = corpus['uri']
            corpusLabel = corpus['label']
            queryStr = """
                    select distinct ?cc ?label ?dim {
                    ?cc a <http://data.hulib.helsinki.fi/ns/qb4cc#CorpusComposition> .
                    ?cc <http://data.hulib.helsinki.fi/ns/qb4cc#corpus> <%s> .
                    ?cc <http://purl.org/dc/terms/title> ?label .
                    OPTIONAL {
                        ?cc <http://purl.org/linked-data/cube#structure> ?s .
                        ?s <http://data.hulib.helsinki.fi/ns/qb4cc#dimension> ?d .
                        ?d <http://purl.org/linked-data/cube#dimension> ?dim .
                        ?dim a <http://data.hulib.helsinki.fi/ns/qb4cc#Genre> .

                    }

                }
            """ % (corpusURI)
            pubData = []
            qres = self.dm.query_cc(queryStr)
            for row in qres:
                genres = []
                if (row['dim']):
                    #main_logger.info('has dim')
                    main_logger.info(row['dim'])
                    q2 = "select distinct ?uri ?label { <%s> <http://purl.org/linked-data/cube#codeList> ?list . ?uri <http://www.w3.org/2004/02/skos/core#inScheme> ?list . ?uri <http://www.w3.org/2004/02/skos/core#prefLabel> ?label } order by ?label" % (
                        row['dim'])
                    main_logger.info(q2)
                    qres2 = gcc.query(q2)
                    for row2 in qres2:
                        genres.append({
                            'uri': row2['uri'],
                            'label': row2['label']
                        })
                pubData.append({
                    'uri': row['cc'],
                    'label': row['label'],
                    'genres': genres
                })
            data.append({
                'uri': corpusURI,
                'label': corpusLabel,
                'options': pubData,
                'selected': ''
            })
        resp.status = falcon.HTTP_200
        resp.content_type = 'application/json'
        resp.data = json.dumps({pubURI + sheet: data})
示例#10
0
文件: api.py 项目: jkesanie/lada
def create():

    """Create the API endpoints."""
    cors = CORS(allow_all_origins=True,allow_all_methods=True, allow_all_headers=True)
    app = falcon.API(middleware=[cors.middleware, MultipartMiddleware()])

    dm = DataManager(InMemoryStorage(), InMemoryStorage(), data_folder, lcdURL, lcdPort)


    app.add_route('/annotatedFiles', AnnotatedFiles(dm))
    app.add_route('/annotatedFiles/json', AnnotatedFilesJSON(dm))
    app.add_route('/publications', GetPublications(dm))

    app.add_route('/expression', CubeExpressions(dm))
    app.add_route('/corpus', CubeCorpora(dm))
    app.add_route('/genre', CubeGenres(dm))
    app.add_route('/function', CubeFunctions(dm))

    app.add_route('/corpus/groups', GroupCorpora(dm))
    app.add_route('/expression/groups', GroupExpressions(dm))
    app.add_route('/function/groups', GroupFunctions(dm))
    app.add_route('/genre/groups', GroupGenres(dm))

    app.add_route('/groups', Groups(dm))


    app.add_route('/obs/filtered', FilteredObservations(dm))
    app.add_route('/obs/filtered/query', QueryFilteredObservations(dm))
    app.add_route('/obs/filtered/result', FilteredResultObservations(dm))

    app.add_route('/obs/filtered/preview', FilteredObservationsPreview(dm))

    app.add_route('/infer', Infer(dm))


    app.add_route('/cc/filtered', CCFiltered(dm))

    app.add_route('/normalize', Normalize(dm))
    app.add_route('/obs/norm2', CreateNormalizedCube(dm))
    app.add_route('/obs/norm/query', QueryNormalizedCube(dm))
    app.add_route('/obs/norm/defs', NormalizedCubeDefinitions(dm))

    app.add_route('/obs/excluded', ExcludedObservations(dm))
    app.add_route('/pub/excluded', ExcludedPublications(dm))


    app.add_route('/image', Image(image_folder))

    app.add_route('/lcd/status', CheckForLCDConnection(dm))

    main_logger.info('App2 is running.')
    return app
示例#11
0
    def on_post(self, req, resp):
        data = {
            'files': []
        }
        filenames = req.params
        for filename in filenames:
            try:
                main_logger.info(filename)
                file = req.get_param(filename)
                path = self.save_file(file)
                rdfPath = self.generate_rdf(path)
                graphURI = self.store_rdf(rdfPath)
                self.transformLiterals(graphURI, 'data:Expression', 'expression')
                self.transformLiterals(graphURI, 'data:Function', 'function')
                self.transformLiterals(graphURI, 'data:Genre', 'genre')
                self.transformPeriods(graphURI, 'timeperiod')
                self.transformResources(graphURI, 'data:Corpus', 'corpus')

                self.dm.export('ggen')
                pubMetadata = self.get_publication_metadata(graphURI)

                self.store_publication_metadata(pubMetadata, path, rdfPath)


                data['files'].append(
                    {
                        'filename': filename,
                        'path': path,
                        'uri': str(graphURI),
                        'status': 'OK'
                    }
                );
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                traceback.print_exception(exc_type, exc_value, exc_traceback, limit=5, file=sys.stdout)

                data['files'].append(
                    {
                        'filename': filename,
                        'path': path,
                        'uri': str(graphURI),
                        'status': 'ERROR',
                        'message': 'change me'
                    }
                );
        #gpubs.serialize("pubs.ttl", format="turtle")
        #g.serialize("g.ttl", format="turtle")

        resp.status = falcon.HTTP_200
        resp.content_type = 'application/json'
        resp.data = json.dumps(data)
示例#12
0
    def on_delete(self, req, resp):
        main_logger.info('Delete: annotatedFiles')
        uri = req.get_param('uri')
        folder = req.get_param('file')
        # delete rdf folder

        # delete annotated file

        # remove rdf from the pubs graph
        self.dm.remove( (URIRef(uri), None, None), None )

        # remove cube data
        cubeGraphURI = URIRef(':' + folder)
        main_logger.info(cubeGraphURI)
        self.dm.clear(cubeGraphURI)

        #g.serialize("g.ttl", format="turtle")
        resp.status = falcon.HTTP_200
        resp.content_type = 'application/json'
示例#13
0
    def on_get(self, req, resp):
        normalizationBase = req.get_param_as_int('base', 0)
        ccURI = req.get_param('ccURI', None)
        startYear = req.get_param('startYear', None)
        endYear = req.get_param('endYear', None)
        absValue = req.get_param_as_int('absValue', 0)
        obs = req.get_param('obs', None)
        pub = req.get_param('pub', None)
        sheet = req.get_param('sheet', None)
        ckey = req.get_param('ckey', None)
        ccGenre = req.get_param('ccgenre', None)
        ccGenres = req.get_param_as_list('ccgenres', None)

        genrePart = ''
        if ccGenres:
            gfilters = []
            for f in ccGenres:
                gfilters.append('<' + f + '>')
            genrePart = '?part qb4cc:genre ?genre . FILTER (?genre IN(' + ','.join(
                gfilters) + ')) .'

        query = self.query_template.format(normalizationBase, absValue, ccURI,
                                           startYear, endYear, genrePart)

        main_logger.info(query)
        qres = self.dm.query_cc(query)
        normValue = -1
        for row in qres:
            main_logger.info(row)
            normValue = row['normalizedValue']

        resp.status = falcon.HTTP_200
        resp.content_type = 'application/json'
        key = pub + sheet
        #if(genre):
        #    resp.data = json.dumps({ key: { ckey: { genre: normValue }}})
        #else:
        resp.data = json.dumps({key: {ckey: normValue}})
示例#14
0
    def on_post(self, req, resp):
        main_logger.info('POST norm2')
        raw_json = req.stream.read()
        obj = json.loads(raw_json.decode('utf-8'))
        main_logger.info(obj)

        s = ns_lada['cube']
        dsd = ns_lada['structure']

        gresult = Graph()

        gresult.add((s, RDF.type, ns_cube['DataSet']))
        gresult.add((s, ns_cube['structure'], dsd))

        for key in obj:
            main_logger.info(key)
            clusters = obj[key]
            for o in clusters:
                if o['selected']:
                    uri = URIRef(o['selected'])
                    obs = o['values'][o['selected']]

                    # find the one with the per = 1

                    for vkey in o['values']:
                        value = o['values'][vkey]
                        if value['per'] == 1:
                            uri = URIRef(value['obs'])

                    gresult.add((uri, RDF.type, ns_cube['Observation']))
                    gresult.add((uri, ns_lada['frequency'],
                                 Literal(float(obs['freq']))))
                    #if o['period']:
                    #    gresult.add( (uri, ns_lada['timeperiod'], URIRef(o['period'])) )
                    #if o['corpus2']:
                #        gresult.add( (uri, ns_lcd['corpus'], URIRef(o['corpus2']) ))
                #    if o['genre']:
                #        gresult.add( (uri, ns_lada['genre'], URIRef(o['genre']) ))
                #if not filters['noexpression'] and o['exp']: # and !noexpression
                #gresult.add( (uri, ns_lada['expression'], URIRef(o['exp']) ))
                #    if o['func']:
                #        gresult.add( (uri, ns_lada['function'], URIRef(o['func']) ))

        gresult.serialize("gresult3.ttl", format="turtle")
        self.dm.add_graph(gresult, 'gresult')
示例#15
0
    def on_post(self, req, resp):
        raw_json = req.stream.read()
        test = None
        if len(raw_json) > 0:
            test = json.loads(raw_json.decode('utf-8'))

        main_logger.info(test)
        variables = []
        variables.append('?' + test['dimension']['id'] + '_label')
        # check for second dimension
        if 'dimension' in test['dimension']:
            variables.append('?' + test['dimension']['dimension']['id'] +
                             '_label')

        slicePart = ''
        for sc in test['slices']:
            if (sc['type'] == 'value'):
                slicePart = slicePart + self.create_value_filter(sc)
            else:
                slicePart = slicePart + self.create_filter(sc)
        #slicePart = '. '.join(self.create_filter(sc) )
        dimensionPart = self.create_filter(test['dimension'])
        # check for second dimension
        if 'dimension' in test['dimension']:
            dimensionPart = dimensionPart + self.create_filter(
                test['dimension']['dimension'])

        query = """
            select {0} ?obs ?value {{
                ?obs a <http://purl.org/linked-data/cube#Observation> .
                ?obs <http://lada/frequency> ?value .
                {1}
                {2}
            }} order by {0}
        """.format(' '.join(variables), slicePart, dimensionPart)

        main_logger.info(query)
        results = []
        legendItems = []

        qres = self.dm.query(query, ['gresult', 'ggroup', 'ggen', 'ginf'])
        #qres = g.query(query)

        # array order: obs, dimension.2, dimension.1
        dim1Data = []
        dim2Data = []  # legend items
        dim2Dict = {}

        if 'dimension' in test['dimension']:
            previousDim1Value = None
            previousDim2Value = None
            if qres.__nonzero__():
                for row in qres:
                    if str(row.count) != 'None':
                        dim1Value = str(row[variables[0][1:]])
                        dim2Value = str(row[variables[1][1:]])

                        if previousDim2Value == None or previousDim2Value != dim2Value:
                            dim2Data.append(dim2Value)

                        if not dim2Dict.has_key(dim1Value):
                            dim2Dict[dim1Value] = {}

                        if not dim2Dict[dim1Value].has_key(dim2Value):
                            dim2Dict[dim1Value][dim2Value] = float(
                                row['value'])
                        else:
                            dim2Dict[dim1Value][dim2Value] = dim2Dict[
                                dim1Value][dim2Value] + float(row['value'])
                        main_logger.info('added ' + row['value'] + " to " +
                                         dim1Value + ":" + dim2Value)

                        if previousDim1Value == None:
                            dim1Data.append(dim1Value)

                        elif previousDim1Value != dim1Value:
                            dim1Data.append(dim1Value)

                        previousDim1Value = dim1Value
                        previousDim2Value = dim2Value

            main_logger.info(dim1Data)
            main_logger.info(dim2Data)
            main_logger.info(dim2Dict)

            main_logger.info('step 2')

            for v2 in sorted(dim2Data):
                if not v2 in legendItems:
                    legendItems.append(v2)
                    inner = []
                    for v1 in sorted(dim2Dict):
                        obj = {}
                        obj['x'] = v1
                        if dim2Dict[v1].has_key(v2):
                            obj['y'] = dim2Dict[v1][v2]
                            obj['label'] = v2

                        else:
                            obj['y'] = None
                            obj['label'] = v2

                        inner.append(obj)
                    results.append(inner)

            #for dim2Obj in test['dimension']['dimension']['values']:
            #    dim2 = dim2Obj['label']
            #    legendItems.append(dim2)
            #    inner = []
            #    for dim1Obj in test['dimension']['values']:
            #        dim1 = dim1Obj['label']
            #        if dim2Dict.has_key(dim1) and dim2Dict[dim1].has_key(dim2):
            #            obj = { 'x': dim1, 'y': dim2Dict[dim1][dim2]}
            #            inner.append( obj)
            #        else:
            #            obj = { 'x': dim1, 'y': 0}
            #            inner.append( obj)
            #        main_logger.info(inner)
            #    results.append(inner)

            main_logger.info('done')

        else:
            previousDimValue = None
            sum = 0
            if qres.__nonzero__():
                _results = []
                for row in qres:
                    if str(row.count) != 'None':
                        dimValue = row[variables[0][1:]]
                        if previousDimValue == None or previousDimValue == dimValue:
                            sum = sum + float(row['value'])

                        else:

                            _results.append({'x': previousDimValue, 'y': sum})
                            sum = float(row['value'])

                        previousDimValue = dimValue
                _results.append({'x': previousDimValue, 'y': sum})
                results.append(_results)

        data = {}

        data['results'] = results
        data['legend'] = legendItems
        resp.data = json.dumps(data)
        resp.status = falcon.HTTP_200
        resp.content_type = 'application/json'
示例#16
0
    def on_post(self, req, resp):
        main_logger.info('POST query filtered')
        raw_json = req.stream.read()
        obj = json.loads(raw_json.decode('utf-8'))
        main_logger.info(obj)

        expressions = obj['expression']
        expFilters = []
        for f in expressions:
            if (f['type'] != 'group'):
                expFilters.append('<' + f['uri'] + '>')

        corpora = obj['corpus']
        corpusFilters = []
        for f in corpora:
            if (f['type'] != 'group'):
                corpusFilters.append('<' + f['uri'] + '>')

        genres = obj['genre']
        genreFilters = []
        for f in genres:
            if (f['type'] != 'group'):
                genreFilters.append('<' + f['uri'] + '>')

        functions = obj['function']
        funcFilters = []
        for f in functions:
            if (f['type'] != 'group'):
                funcFilters.append('<' + f['uri'] + '>')

        query = """
            select distinct ?obs ?pub ?title ?year ?authors ?pubExcluded ?ds ?excluded ?row ?col ?sheet ?comment ?freq ?per ?corpus ?corpus2 ?corpusName ?genre ?genreName ?exp ?expName ?func ?funcName ?period ?periodName {
                ?obs a <http://purl.org/linked-data/cube#Observation> .
                ?obs <http://lada/filtered> ?f .
                ?pub <http://lada/file> ?file .
                ?pub <http://purl.org/dc/terms/title> ?title .
                ?pub <http://purl.org/dc/terms/issued> ?year .
                ?pub <http://purl.org/dc/terms/creator> ?authors .
                #FILTER NOT EXISTS {
                OPTIONAL {
                    ?pub <http://lada/excluded> ?pubExcluded
                } .
                ?ds <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/file> ?file .
                ?obs <http://lada/sheet> ?sheet .
                ?obs <http://lada/timeperiod> ?period .
                ?period <http://www.w3.org/2000/01/rdf-schema#label> ?periodName .
                ?obs <http://purl.org/linked-data/cube#dataSet> ?ds .
                OPTIONAL {
                    ?ds <http://www.w3.org/2000/01/rdf-schema#comment> ?comment .
                } .

                ?obs <http://lada/row> ?row .
                ?obs <http://lada/col> ?col .
                ?obs <http://lada/sheet> ?sheet .
                ?obs <http://lada/timeperiod> ?period .
                ?period <http://www.w3.org/2000/01/rdf-schema#label> ?periodName .
                #FILTER NOT EXISTS {
                OPTIONAL {
                    ?obs <http://lada/excluded> ?excluded .
                } .
                ?obs <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/data/frequency> ?freq .
                ?obs <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/data/per> ?per .
                #FILTER(?freq > 0) .
        """

        # noexpression: 1 = no value, 2 = any or no value 3 = some value, none = specific values

        if obj['noexpression']:
            if obj['noexpression'] == 1:
                query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_expression> ?exp } . "
            if obj['noexpression'] == 2:
                query = query + " OPTIONAL { ?obs <http://lada/gen_expression> ?exp . ?exp <http://www.w3.org/2000/01/rdf-schema#label> ?expName . }"
            if obj['noexpression'] == 3:
                query = query + " ?obs <http://lada/gen_expression> ?exp . ?exp <http://www.w3.org/2000/01/rdf-schema#label> ?expName ."
        else:
            if (len(expFilters) > 0):
                joined = ", ".join(expFilters)
                query = query + "?obs <http://lada/expression> ?exp . ?exp <http://www.w3.org/2000/01/rdf-schema#label> ?expName . FILTER(?exp IN(" + joined + ")) . "

        if obj['nocorpus']:
            if obj['nocorpus'] == 1:  # no value
                query = query + " FILTER NOT EXISTS { ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus } . "
            if obj['nocorpus'] == 2:
                query = query + " OPTIONAL { ?obs <http://lada/corpus> ?corpus . ?corpus <http://www.w3.org/2000/01/rdf-schema#label> ?corpusName . ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus2   } .  "
            if obj['nocorpus'] == 3:
                query = query + " ?obs <http://lada/corpus> ?corpus . ?corpus <http://www.w3.org/2000/01/rdf-schema#label> ?corpusName . ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus2 . "
        else:
            # some value
            query = query + """
                ?obs <http://lada/corpus> ?corpus .
                ?corpus <http://www.w3.org/2000/01/rdf-schema#label> ?corpusName .
                # TODO: get rid of this filter!
                ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus2
                """
            if len(corpusFilters) > 0:  # specific values
                joined = ", ".join(corpusFilters)
                query = query + " FILTER(?corpus IN(" + joined + ")) . "

        if obj['nofunction']:
            if obj['nofunction'] == 1:
                query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_function> ?func } . "
            if obj['nofunction'] == 2:
                query = query + "OPTIONAL { ?obs <http://lada/gen_function> ?func . ?func <http://www.w3.org/2000/01/rdf-schema#label> ?funcName . } "
            if obj['nofunction'] == 3:
                query = query + " ?obs <http://lada/gen_function> ?func . ?func <http://www.w3.org/2000/01/rdf-schema#label> ?funcName . "
        else:
            query = query + "?obs <http://lada/function> ?func . ?func <http://www.w3.org/2000/01/rdf-schema#label>  ?funcName . "
            if (len(funcFilters) > 0):
                joined = ", ".join(funcFilters)
                query = query + "FILTER(?func IN(" + joined + ")) . "

        if obj['nogenre']:
            if obj['nogenre'] == 1:
                query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_genre> ?genre } . "
            if obj['nogenre'] == 2:
                query = query + "OPTIONAL { ?obs <http://lada/gen_genre> ?genre . ?genre <http://www.w3.org/2000/01/rdf-schema#label> ?genreName . } "
            if obj['nogenre'] == 3:
                query = query + " ?obs <http://lada/gen_genre> ?genre . ?genre <http://www.w3.org/2000/01/rdf-schema#label> ?genreName ."
        else:
            if (len(genreFilters) > 0):
                joined = ", ".join(genreFilters)
                query = query + "?obs <http://lada/genre> ?genre . ?genre <http://www.w3.org/2000/01/rdf-schema#label> ?genreName . FILTER(?genre IN(" + joined + ")) . "

        query = query + "} order by ?pub ?sheet ?row ?col "

        qres = self.dm.query_all(query)
        data = []
        pubs = {}
        sheets = {}

        # Group by publication -> sheet (ds)
        # pub
        # - sheets[]
        #   - obs[]

        # Group together observations that share common dimension values
        main_logger.info('GO')
        for row in qres:
            #main_logger.info(row)
            pub = row['pub']
            sheet = row['sheet']
            main_logger.info(sheet)
            if not pub in pubs:
                newPub = {
                    'pub': row['pub'],
                    'title': row['title'],
                    'year': row['year'],
                    'authors': row['authors'],
                    'excluded': row['pubExcluded'],
                    'sheets': []
                }
                data.append(newPub)
                pubs[pub] = newPub

            if (pub + sheet) not in sheets:
                newSheet = {
                    'name': row['sheet'],
                    'desc': row['comment'],
                    'obs': []
                }
                pubs[pub]['sheets'].append(newSheet)
                sheets[pub + sheet] = newSheet

            sheets[pub + sheet]['obs'].append({
                'obs': row['obs'],
                'excluded': row['excluded'],
                'row': int(row['row']),
                'col': int(row['col']),
                'freq': row['freq'],
                'per': int(row['per']),
                #'corpus': row['corpus'],
                'corpus2': row['corpus2'],
                'corpusName': row['corpusName'],
                'genre': row['genre'],
                'genreName': row['genreName'],
                'exp': row['exp'],
                'expName': row['expName'],
                'func': row['func'],
                'funcName': row['funcName'],
                'period': row['period'],
                'periodName': row['periodName']
            })
        resp.status = falcon.HTTP_200
        resp.content_type = 'application/json'
        resp.data = json.dumps(data)
示例#17
0
    def on_get(self, req, resp):
        main_logger.info('GET cube definitions')
        graphURI = req.get_param('graphuri', default=None)

        measures = []
        dimensions = []
        attributes = []

        # query for measures
        measure_query = """
            select ?measure ?label ?range {
                ?s a <http://purl.org/linked-data/cube#DataStructureDefinition> .
                ?s <http://purl.org/linked-data/cube#component> ?c .
                ?c <http://purl.org/linked-data/cube#measure> ?measure .
                ?c <http://www.w3.org/2000/01/rdf-schema#label> ?label .
                ?measure <http://www.w3.org/2000/01/rdf-schema#range> ?range
            } order by ?label
            """

        main_logger.info(measure_query)
        qres = self.dm.query(measure_query, ['gresult', 'ggroup', 'ggen'])
        if qres.__nonzero__():
            for row in qres:
                main_logger.info(row)
                measures.append({
                    'uri': str(row['measure']),
                    'label': str(row['label']),
                    'range': str(row['range'])
                })

        dimension_query = """
            select distinct ?dim ?label ?range ?codelist {
                ?s a <http://purl.org/linked-data/cube#DataStructureDefinition> .
                ?s <http://purl.org/linked-data/cube#component> ?c .
                ?c <http://purl.org/linked-data/cube#dimension> ?dim .
                ?c <http://www.w3.org/2000/01/rdf-schema#label> ?label .
                ?dim <http://www.w3.org/2000/01/rdf-schema#range> ?range
                OPTIONAL {
                    ?dim <http://purl.org/linked-data/cube#codeList> ?codelist
                }
            } order by ?label
            """
        main_logger.info(dimension_query)
        qres = self.dm.query(dimension_query, ['gresult', 'ggroup', 'ggen'])
        if qres.__nonzero__():
            main_logger.info('test')
            for row in qres:
                main_logger.info(row)
                dimRange = str(row['range'])
                prop = str(row['dim'])
                obj = {
                    'uri': prop,
                    'label': str(row['label']),
                    'range': dimRange
                }
                main_logger.info('codelist: ' + str(row['codelist']))
                if row['codelist'] != None and dimRange == 'http://www.w3.org/2004/02/skos/core#Concept':
                    codelist = row['codelist']
                    qres2 = self.dm.query(
                        """
                        select distinct ?uri ?label {{
                            ?uri <http://www.w3.org/2004/02/skos/core#inScheme> <{0}> .
                            ?uri <http://www.w3.org/2004/02/skos/core#prefLabel> ?label
                        }} order by ?label
                        """.format(codelist), ['gresult', 'ggroup', 'ggen'])
                    concepts = []
                    for row2 in qres2:
                        concepts.append({
                            'uri': row2['uri'],
                            'label': row2['label']
                        })
                    obj['concepts'] = concepts
                elif row[
                        'codelist'] != None and dimRange == 'http://data.hulib.helsinki.fi/ns/qb4cc#TimePeriod':
                    codelist = row['codelist']
                    qres2 = self.dm.query(
                        """
                        select distinct ?uri ?label {{
                            ?uri <http://www.w3.org/2004/02/skos/core#inScheme> <http://lada/codelist/period> .
                            ?uri <http://www.w3.org/2004/02/skos/core#prefLabel> ?label
                        }} order by ?label
                        """, ['gresult'])
                    values = []
                    for row2 in qres2:
                        values.append(row2['label'])
                    obj['values'] = values
                elif dimRange.startswith(
                        'http://www.w3.org/2001/XMLSchema#'):  # literals
                    qres2 = self.dm.query(
                        """
                        select distinct ?value {{
                            ?uri <{0}> ?value
                        }} order by ?value
                        """.format(prop), ['gresult', 'ggroup', 'ggen'])
                    values = []
                    for row2 in qres2:
                        values.append(row2['value'])
                    obj['values'] = values
                else:  # object objects, should have rdfs:labels or preflabels
                    qres2 = self.dm.query(
                        """
                        select distinct ?uri ?label {{
                            ?obj  <{0}> ?uri .
                            OPTIONAL {{
                                ?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label
                            }}

                        }} order by ?label
                        """.format(prop), ['gresult', 'ggroup', 'ggen'])
                    objs = []
                    for row2 in qres2:
                        objs.append({
                            'uri':
                            row2['uri'],
                            'label': (row2['label'] if row2['label'] != None
                                      else row2['uri'])
                        })
                    #obj['objs'] = objs
                    obj['concepts'] = objs

                dimensions.append(obj)

        main_logger.info('done')
        results = {}
        results['measures'] = measures
        results['dimensions'] = dimensions
        resp.data = json.dumps(results)
        resp.status = falcon.HTTP_200
        resp.content_type = 'application/json'
示例#18
0
    def on_post(self, req, resp):
        main_logger.info("ExcludedObservations POST with " + (req.get_param('obs') or 'empty' ))

        self.dm.add_triple( (URIRef(req.get_param('obs')), ns_lada['excluded'], Literal(1) ), 'gexc' )
示例#19
0
    def on_post(self, req, resp):
        main_logger.info('POST cube definitions')
        raw_json = req.stream.read()
        obj = json.loads(raw_json.decode('utf-8'))
        main_logger.info(obj)

        self.dm.clear('gresult')

        gresult = Graph()

        s = ns_lada['structure']
        gresult.add((s, RDF.type, ns_cube['DataStructureDefinition']))
        # only measure is frequency
        mnode = BNode()
        gresult.add((s, ns_cube['component'], mnode))
        gresult.add((mnode, RDFS.label, Literal('normalized frequency')))

        measure = ns_lada['measure#frequency']
        gresult.add((measure, RDF.type, ns_cube['Measure']))
        gresult.add((measure, RDFS.range, ns_xsd['decimal']))
        gresult.add((mnode, ns_cube['measure'], measure))
        # TODO: add normalization base to the measure

        if not obj['nocorpus'] or len(obj['corpus']) > 0:
            anode = BNode()
            #corpusDim = ns_lcd['Corpus']
            corpusDim = ns_lada['corpus']
            gresult.add((s, ns_cube['component'], anode))
            gresult.add((anode, RDFS.label, Literal('Corpus')))
            gresult.add((anode, ns_cube['dimension'], corpusDim))
            gresult.add((corpusDim, RDF.type, ns_qb4cc['corpus']))
            gresult.add((corpusDim, RDFS.range, ns_lcd['Corpus']))

        if len(obj['corpus']) > 0:
            #            for c in obj['corpus']:
            #                gresult.add( (URIRef(c['uri']), RDFS.label, Literal(c['label'])) )
            cList = ns_lada['codelist/corpus']
            gresult.add((ns_lada['corpus'], ns_cube['codeList'], cList))
            for c in obj['corpus']:
                gresult.add(
                    (URIRef(c['uri']), RDFS.label, Literal(c['label'])))
                gresult.add(
                    (URIRef(c['uri']), SKOS.prefLabel, Literal(c['label'])))
                gresult.add((URIRef(c['uri']), SKOS.inScheme, cList))

        if not obj['noexpression'] or len(obj['expression']) > 0:
            anode = BNode()
            dim = ns_lada['expression']
            gresult.add((s, ns_cube['component'], anode))
            gresult.add((anode, RDFS.label, Literal('Expression')))
            gresult.add((anode, ns_cube['dimension'], dim))
            gresult.add((dim, RDF.type, ns_qb4cc['expression']))
            #gresult.add( (dim, RDFS.range, ns_lcd['Expression']) )
            gresult.add((dim, RDFS.range, ns_skos['Concept']))

        if len(obj['expression']) > 0:
            expList = ns_lada['codelist/exp']
            gresult.add((ns_lada['expression'], ns_cube['codeList'], expList))
            for c in obj['expression']:
                gresult.add(
                    (URIRef(c['uri']), RDFS.label, Literal(c['label'])))
                gresult.add(
                    (URIRef(c['uri']), SKOS.prefLabel, Literal(c['label'])))
                gresult.add((URIRef(c['uri']), SKOS.inScheme, expList))

        if not obj['nofunction'] or len(obj['function']) > 0:
            anode = BNode()
            dim = ns_lada['function']
            gresult.add((s, ns_cube['component'], anode))
            gresult.add((anode, RDFS.label, Literal('Function')))
            gresult.add((anode, ns_cube['dimension'], dim))
            gresult.add((dim, RDF.type, ns_qb4cc['function']))
            #gresult.add( (dim, RDFS.range, ns_lcd['Expression']) )
            gresult.add((dim, RDFS.range, ns_skos['Concept']))

        if len(obj['function']) > 0:
            expList = ns_lada['codelist/func']
            gresult.add((ns_lada['function'], ns_cube['codeList'], expList))
            for c in obj['function']:
                gresult.add(
                    (URIRef(c['uri']), RDFS.label, Literal(c['label'])))
                gresult.add(
                    (URIRef(c['uri']), SKOS.prefLabel, Literal(c['label'])))
                gresult.add((URIRef(c['uri']), SKOS.inScheme, expList))

        if not obj['nogenre'] or len(obj['genre']) > 0 or obj['nogenre'] != 1:
            anode = BNode()
            dim = ns_lada['genre']
            gresult.add((s, ns_cube['component'], anode))
            gresult.add((anode, RDFS.label, Literal('Genre')))
            gresult.add((anode, ns_cube['dimension'], dim))
            gresult.add((dim, RDF.type, ns_qb4cc['genre']))
            #gresult.add( (dim, RDFS.range, ns_lcd['Expression']) )
            gresult.add((dim, RDFS.range, ns_skos['Concept']))

        if len(obj['genre']) > 0:
            expList = ns_lada['codelist/genre']
            gresult.add((ns_lada['genre'], ns_cube['codeList'], expList))
            for c in obj['genre']:
                gresult.add(
                    (URIRef(c['uri']), RDFS.label, Literal(c['label'])))
                gresult.add(
                    (URIRef(c['uri']), SKOS.prefLabel, Literal(c['label'])))
                gresult.add((URIRef(c['uri']), SKOS.inScheme, expList))

        #TODO: add other dimensiton types hre


#            corpusList = ns_lada['codelist/corpus']
#            gresult.add( (corpusDim, ns_cube['codeList'], corpusList ))

        if len(obj['timeperiod']) > 0:
            periodnode = BNode()
            periodDim = ns_lada['timeperiod']
            gresult.add((s, ns_cube['component'], periodnode))
            gresult.add((periodnode, RDFS.label, Literal('Time period')))
            gresult.add((periodnode, ns_cube['dimension'], periodDim))
            gresult.add((periodDim, RDF.type, ns_qb4cc['timePeriod']))
            gresult.add((periodDim, RDFS.range, ns_qb4cc['TimePeriod']))
            periodList = ns_lada['codelist/period']
            gresult.add(
                (ns_lada['timeperiod'], ns_cube['codeList'], periodList))
            for c in obj['timeperiod']:
                gresult.add(
                    (URIRef(c['uri']), RDFS.label, Literal(c['label'])))
                gresult.add(
                    (URIRef(c['uri']), SKOS.prefLabel, Literal(c['label'])))
                gresult.add((URIRef(c['uri']), SKOS.inScheme, periodList))

        gresult.serialize("gresult.ttl", format="turtle")

        self.dm.add_graph(gresult, 'gresult')

        resp.status = falcon.HTTP_200
        resp.content_type = 'application/json'
        #        resp.data = json.dumps({'status': 'OK'})
        resp.data = json.dumps(obj)
示例#20
0
    def on_post(self, req, resp):
        main_logger.info('Preview')

        raw_json = req.stream.read()
        obj = json.loads(raw_json.decode('utf-8'))
        main_logger.info(obj)

        expressions = obj['expression']
        expFilters = []
        for f in expressions:
            if (f['type'] != 'group'):
                for fv in f['values']:
                    expFilters.append('<' + fv + '>')

        corpora = obj['corpus']
        corpusFilters = []
        for f in corpora:
            if (f['type'] != 'group'):
                for fv in f['values']:
                    corpusFilters.append('<' + fv + '>')

        genres = obj['genre']
        genreFilters = []
        for f in genres:
            if (f['type'] != 'group'):
                for fv in f['values']:
                    genreFilters.append('<' + fv + '>')

        functions = obj['function']
        funcFilters = []
        for f in functions:
            if (f['type'] != 'group'):
                for fv in f['values']:
                    funcFilters.append('<' + fv + '>')

        query = """
            select distinct ?title ?pub ?ds ?obs {
                ?obs a <http://purl.org/linked-data/cube#Observation> .
                ?obs <http://purl.org/linked-data/cube#dataSet> ?ds .
                ?ds <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/file> ?file .
                ?pub <http://lada/file> ?file .
                ?pub <http://purl.org/dc/terms/title> ?title .
                ?obs <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/data/frequency> ?freq .
                    ?obs <http://lada/timeperiod> ?period .
                    ?period <http://www.w3.org/2000/01/rdf-schema#label> ?periodName .
                #FILTER(?freq > 0) .
        """

        if obj['noexpression']:
            if obj['noexpression'] == 1:
                query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_expression> ?exp } . "
            if obj['noexpression'] == 3:
                query = query + " ?obs <http://lada/gen_expression> ?exp . "

        else:
            if (len(expFilters) > 0):
                joined = ", ".join(expFilters)
                query = query + "?obs <http://lada/gen_expression> ?exp . FILTER(?exp IN(" + joined + ")) . "

        if obj['nocorpus']:
            if obj['nocorpus'] == 1:  # no value
                query = query + " FILTER NOT EXISTS { ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus } . "
            if obj['nocorpus'] == 2:
                query = query + " OPTIONAL { ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus . ?corpus <http://www.w3.org/2000/01/rdf-schema#label> ?corpusName . } "
            if obj['nocorpus'] == 3:
                query = query + " ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus . "
        else:
            # some value
            query = query + """
                ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus .
                ?corpus <http://www.w3.org/2000/01/rdf-schema#label> ?corpusName .
                """
            if len(corpusFilters) > 0:  # specific values
                joined = ", ".join(corpusFilters)
                query = query + " FILTER(?corpus IN(" + joined + ")) . "


#        if obj['nocorpus']:
#            if obj['nocorpus'] == 1:
#                query = query + " FILTER NOT EXISTS { ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus } . "
#            if obj['nocorpus'] == 3:
#                query = query + " ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus . "
#        else:
#            query = query + """
#                ?obs <http://lada/corpus> ?corpus .
#                # TODO: get rid of this filter!
#                #FILTER NOT EXISTS { ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus  }
#                """
#            joined = ", ".join(corpusFilters)
#            query = query + " FILTER(?corpus IN(" + joined + ")) . "

        if obj['nofunction']:
            if obj['nofunction'] == 1:
                query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_function> ?func } . "
            if obj['nofunction'] == 3:
                query = query + " ?obs <http://lada/gen_function> ?func . "
        else:
            if (len(funcFilters) > 0):
                joined = ", ".join(funcFilters)
                query = query + "?obs <http://lada/gen_function> ?func . FILTER(?func IN(" + joined + ")) . "

        if obj['nogenre']:
            if obj['nogenre'] == 1:
                query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_genre> ?genre } . "
            if obj['nogenre'] == 3:
                query = query + " ?obs <http://lada/gen_genre> ?genre . "

        else:
            if (len(genreFilters) > 0):
                joined = ", ".join(genreFilters)
                query = query + "?obs <http://lada/gen_genre> ?genre . FILTER(?genre IN(" + joined + ")) . "

        query = query + "} order by ?title ?ds ?obs"

        pubs = []
        data = {}
        previousDs = None
        previousPub = None
        numOfDs = 0
        numOfObs = 0
        main_logger.info(query)
        qres = self.dm.query_all(query)
        for row in qres:
            #main_logger.info(row)
            pub = row['pub']
            ds = row['ds']
            obs = row['obs']
            if not pub in data:
                data[pub] = {'title': row['title'], 'uri': row['pub']}

            if previousDs == None and previousPub == None:
                main_logger.info('first row')
                data[pub]['obs'] = 1
                data[pub]['ds'] = 1
                previousDs = ds
                previousPub = pub
                continue

            if previousDs != ds:
                numOfDs = numOfDs + 1
                data[pub]['ds'] = numOfDs
                data[pub]['obs'] = numOfObs

            if previousPub != pub:
                data[previousPub]['ds'] = numOfDs

                data[previousPub]['obs'] = numOfObs + 1
                numOfObs = 0
                numOfDs = 0

            numOfObs = numOfObs + 1

            previousDs = ds
            previousPub = pub

        if previousPub != None:
            data[previousPub]['obs'] = numOfObs

        for key in data:
            pubs.append(data[key])

        main_logger.info(data)

        resp.status = falcon.HTTP_200
        resp.content_type = 'application/json'
        resp.data = json.dumps(pubs)
示例#21
0
    def on_get(self, req, resp):
        main_logger.info('GET filtered')

        query = """
            select distinct ?obs ?pub ?title ?year ?authors ?pubExcluded ?ds ?excluded ?row ?col ?sheet ?comment ?freq ?per ?corpus2 ?corpusName ?genre ?genreName ?exp ?expName ?func ?funcName ?period ?periodName {
                #graph <http://lada/graph/filtered> {
                    ?obs a <http://purl.org/linked-data/cube#Observation> .
                    ?obs <http://lada/filtered> ?f .
                #}
                ?pub <http://lada/file> ?file .
                ?pub <http://purl.org/dc/terms/title> ?title .
                ?pub <http://purl.org/dc/terms/issued> ?year .
                ?pub <http://purl.org/dc/terms/creator> ?authors .
                OPTIONAL {
                    ?pub <http://lada/excluded> ?pubExcluded
                } .
                ?ds <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/file> ?file .
                ?obs <http://purl.org/linked-data/cube#dataSet> ?ds .
                OPTIONAL {
                    ?ds <http://www.w3.org/2000/01/rdf-schema#comment> ?comment .
                } .
                ?obs <http://lada/row> ?row .
                ?obs <http://lada/col> ?col .
                ?obs <http://lada/sheet> ?sheet
                OPTIONAL {
                    ?obs <http://lada/gen_expression> ?exp .
                    ?exp <http://www.w3.org/2000/01/rdf-schema#label> ?expName .
                }
                OPTIONAL {
                    ?obs <http://lada/gen_function> ?func .
                    ?func <http://www.w3.org/2000/01/rdf-schema#label> ?funcName .
                }
                OPTIONAL {
                    ?obs <http://lada/gen_genre> ?genre .
                    ?genre <http://www.w3.org/2000/01/rdf-schema#label> ?genreName .
                }
#                OPTIONAL {
                    ?obs <http://lada/timeperiod> ?period .
                    ?period <http://www.w3.org/2000/01/rdf-schema#label> ?periodName .
#                }


                ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus2 .
                ?corpus2 <http://www.w3.org/2000/01/rdf-schema#label> ?corpusName .
                OPTIONAL {
                    ?obs <http://lada/excluded> ?excluded .
                } .
                ?obs <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/data/frequency> ?freq .
                ?obs <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/data/per> ?per .
                #FILTER(?freq > 0) .
            } order by ?pub ?sheet ?row ?col
        """
        main_logger.info(query)
        qres = self.dm.query_all(query)
        data = []
        pubs = {}
        sheets = {}

        # Group by publication -> sheet (ds)
        # pub
        # - sheets[]
        #   - obs[]

        # Group together observations that share common dimension values
        main_logger.info('GO')
        for row in qres:
            #main_logger.info(row)
            pub = row['pub']
            sheet = row['sheet']
            main_logger.info(sheet)
            if not pub in pubs:
                newPub = {
                    'pub': row['pub'],
                    'title': row['title'],
                    'year': row['year'],
                    'authors': row['authors'],
                    'excluded': row['pubExcluded'],
                    'sheets': []
                }
                data.append(newPub)
                pubs[pub] = newPub

            if (pub + sheet) not in sheets:
                newSheet = {
                    'name': row['sheet'],
                    'desc': row['comment'],
                    'obs': []
                }
                pubs[pub]['sheets'].append(newSheet)
                sheets[pub + sheet] = newSheet

            sheets[pub + sheet]['obs'].append({
                'obs': row['obs'],
                'excluded': row['excluded'],
                'row': int(row['row']),
                'col': int(row['col']),
                'freq': row['freq'],
                'per': int(row['per']),
                #'corpus': row['corpus'],
                'corpus2': row['corpus2'],
                'corpusName': row['corpusName'],
                'genre': row['genre'],
                'genreName': row['genreName'],
                'exp': row['exp'],
                'expName': row['expName'],
                'func': row['func'],
                'funcName': row['funcName'],
                'period': row['period'],
                'periodName': row['periodName']
            })
        resp.status = falcon.HTTP_200
        resp.content_type = 'application/json'
        resp.data = json.dumps(data)
示例#22
0
    def on_post(self, req, resp):
        main_logger.info('GET filtered result')
        raw_json = req.stream.read()
        obj = json.loads(raw_json.decode('utf-8'))
        main_logger.info(obj)

        #corpus = req.get_param('corpus', None)
        #exp = req.get_param('exp', None)
        #func = req.get_param('func', None)
        #genre = req.get_param('genre', None)

        expressions = obj['expression']
        expFilters = []
        for f in expressions:
            if (f['type'] != 'group'):
                expFilters.append('<' + f['uri'] + '>')

        corpora = obj['corpus']
        corpusFilters = []
        for f in corpora:
            if (f['type'] != 'group'):
                corpusFilters.append('<' + f['uri'] + '>')

        genres = obj['genre']
        genreFilters = []
        for f in genres:
            if (f['type'] != 'group'):
                genreFilters.append('<' + f['uri'] + '>')

        functions = obj['function']
        funcFilters = []
        for f in functions:
            if (f['type'] != 'group'):
                funcFilters.append('<' + f['uri'] + '>')

        query = """
            select distinct ?obs ?freq ?per ?corpus ?corpusName ?genre ?genreName ?exp ?expName ?func ?funcName ?period ?periodName {
                ?obs a <http://purl.org/linked-data/cube#Observation> .
                ?obs <http://lada/filtered> ?f .
                ?pub <http://lada/file> ?file .
                ?pub <http://purl.org/dc/terms/title> ?title .
                ?pub <http://purl.org/dc/terms/issued> ?year .
                ?pub <http://purl.org/dc/terms/creator> ?authors .
                FILTER NOT EXISTS {
                    ?pub <http://lada/excluded> ?pubExcluded
                } .
                ?ds <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/file> ?file .
                ?obs <http://purl.org/linked-data/cube#dataSet> ?ds .
                ?obs <http://lada/sheet> ?sheet .
#                OPTIONAL {
#                    ?obs <http://lada/function> ?func .
#                    ?func <http://www.w3.org/2000/01/rdf-schema#label> ?funcName .
#                }
#                OPTIONAL {
#                    ?obs <http://lada/genre> ?genre .
#                    ?genre <http://www.w3.org/2000/01/rdf-schema#label> ?genreName .
#                }
#                OPTIONAL {
                    ?obs <http://lada/timeperiod> ?period .
                    ?period <http://www.w3.org/2000/01/rdf-schema#label> ?periodName .
#                }
                FILTER NOT EXISTS {
                    ?obs <http://lada/excluded> ?excluded .
                } .
                ?obs <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/data/frequency> ?freq .
                ?obs <http://data.hulib.helsinki.fi/id/ontology/varieng/v1/data/per> ?per .
                #FILTER(?freq > 0) .
        """

        # noexpression: 1 = no value, 2 = any or no value 3 = some value, none = specific values

        if obj['noexpression']:
            if obj['noexpression'] == 1:
                query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_expression> ?exp } . "
            if obj['noexpression'] == 2:
                query = query + " OPTIONAL { ?obs <http://lada/gen_expression> ?exp . ?exp <http://www.w3.org/2000/01/rdf-schema#label> ?expName . }"
            if obj['noexpression'] == 3:
                query = query + " ?obs <http://lada/gen_expression> ?exp . ?exp <http://www.w3.org/2000/01/rdf-schema#label> ?expName ."
        else:
            if (len(expFilters) > 0):
                joined = ", ".join(expFilters)
                query = query + "?obs <http://lada/expression> ?exp . ?exp <http://www.w3.org/2000/01/rdf-schema#label> ?expName . FILTER(?exp IN(" + joined + ")) . "

        if obj['nocorpus']:
            if obj['nocorpus'] == 1:  # no value
                query = query + " FILTER NOT EXISTS { ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus } . "
            if obj['nocorpus'] == 2:
                query = query + " OPTIONAL { ?obs <http://lada/corpus> ?corpus . ?corpus <http://www.w3.org/2000/01/rdf-schema#label> ?corpusName . FILTER NOT EXISTS { ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus  } } .  "
            if obj['nocorpus'] == 3:
                query = query + " ?obs <http://lada/corpus> ?corpus . ?corpus <http://www.w3.org/2000/01/rdf-schema#label> ?corpusName .  "
        else:
            # some value
            query = query + """
                ?obs <http://lada/corpus> ?corpus .
                ?corpus <http://www.w3.org/2000/01/rdf-schema#label> ?corpusName .
                # TODO: get rid of this filter!
                FILTER NOT EXISTS { ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus  }
                """
            if len(corpusFilters) > 0:  # specific values
                joined = ", ".join(corpusFilters)
                query = query + " FILTER(?corpus IN(" + joined + ")) . "

        if obj['nofunction']:
            if obj['nofunction'] == 1:
                query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_function> ?func } . "
            if obj['nofunction'] == 2:
                query = query + "OPTIONAL { ?obs <http://lada/gen_function> ?func . ?func <http://www.w3.org/2000/01/rdf-schema#label> ?funcName . } "
            if obj['nofunction'] == 3:
                query = query + " ?obs <http://lada/gen_function> ?func . ?func <http://www.w3.org/2000/01/rdf-schema#label> ?funcName . "
        else:
            if (len(funcFilters) > 0):
                joined = ", ".join(funcFilters)
                query = query + "?obs <http://lada/function> ?func . ?func <http://www.w3.org/2000/01/rdf-schema#label> ?funcName . FILTER(?func IN(" + joined + ")) . "

        if obj['nogenre']:
            if obj['nogenre'] == 1:
                query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_genre> ?genre } . "
            if obj['nogenre'] == 2:
                query = query + "OPTIONAL { ?obs <http://lada/gen_genre> ?genre . ?genre <http://www.w3.org/2000/01/rdf-schema#label> ?genreName . } "
            if obj['nogenre'] == 3:
                query = query + " ?obs <http://lada/gen_genre> ?genre . ?genre <http://www.w3.org/2000/01/rdf-schema#label> ?genreName ."
        else:
            if (len(genreFilters) > 0):
                joined = ", ".join(genreFilters)
                query = query + "?obs <http://lada/genre> ?genre . ?genre <http://www.w3.org/2000/01/rdf-schema#label> ?genreName . FILTER(?genre IN(" + joined + ")) . "

        query = query + "} order by ?corpusName ?periodName ?expName ?genreName ?funcName "

        main_logger.info(query)
        qres = self.dm.query_all(query)
        data = []

        for row in qres:
            data.append({
                'obs': row['obs'],
                'freq': row['freq'],
                'per': int(row['per']),
                'corpus': row['corpus'],
                'corpusName': row['corpusName'],
                'genre': row['genre'],
                'genreName': row['genreName'],
                'exp': row['exp'],
                'expName': row['expName'],
                'func': row['func'],
                'funcName': row['funcName'],
                'period': row['period'],
                'periodName': row['periodName']
            })
        resp.status = falcon.HTTP_200
        resp.content_type = 'application/json'
        resp.data = json.dumps(data)
示例#23
0
    def on_post(self, req, resp):
        main_logger.info('POST filtered')
        raw_json = req.stream.read()
        obj = json.loads(raw_json.decode('utf-8'))
        main_logger.info(obj)

        expressions = obj['expression']
        expFilters = []
        for f in expressions:
            if (f['type'] != 'group'):
                expFilters.append('<' + f['uri'] + '>')

        corpora = obj['corpus']
        corpusFilters = []
        for f in corpora:
            if (f['type'] != 'group'):
                corpusFilters.append('<' + f['uri'] + '>')

        genres = obj['genre']
        genreFilters = []
        for f in genres:
            if (f['type'] != 'group'):
                genreFilters.append('<' + f['uri'] + '>')

        functions = obj['function']
        funcFilters = []
        for f in functions:
            if (f['type'] != 'group'):
                funcFilters.append('<' + f['uri'] + '>')

        query = """
            construct {
                #?obs a <http://purl.org/linked-data/cube#Observation>
                ?obs <http://lada/filtered> 1
            }
            where {
        """

        # noexpression: 1 = no value, 2 = any or no value 3 = some value, none = specific values

        if obj['noexpression']:
            if obj['noexpression'] == 1:
                query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_expression> ?exp } . "
            if obj['noexpression'] == 2:
                query = query + " ?obs a <http://purl.org/linked-data/cube#Observation> . "
            if obj['noexpression'] == 3:
                query = query + " ?obs <http://lada/gen_expression> ?exp . "
        else:
            if len(expFilters) > 0:
                joined = ", ".join(expFilters)
                query = query + "?obs <http://lada/expression> ?exp . FILTER(?exp IN(" + joined + ")) . "

        if obj['nocorpus']:
            if obj['nocorpus'] == 1:  # no value
                query = query + " FILTER NOT EXISTS { ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus } . "
            if obj['nocorpus'] == 2:
                query = query + " ?obs a <http://purl.org/linked-data/cube#Observation> . "

        else:
            # some value
            query = query + """
                ?obs <http://lada/corpus> ?corpus .
                # TODO: get rid of this filter!
                FILTER NOT EXISTS { ?obs <http://h224.it.helsinki.fi:8080/varieng/data/Corpus> ?corpus  }
                """
            if len(corpusFilters) > 0:  # specific values
                joined = ", ".join(corpusFilters)
                query = query + " FILTER(?corpus IN(" + joined + ")) . "

        if obj['nofunction']:
            if obj['nofunction'] == 1:
                query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_function> ?func } . "
            if obj['nofunction'] == 2:
                query = query + " ?obs a <http://purl.org/linked-data/cube#Observation> . "
            if obj['nofunction'] == 3:
                query = query + " ?obs <http://lada/gen_function> ?func . "
        else:
            if len(funcFilters) > 0:
                joined = ", ".join(funcFilters)
                query = query + "?obs <http://lada/function> ?func . FILTER(?func IN(" + joined + ")) . "

        if obj['nogenre']:
            if obj['nogenre'] == 1:
                query = query + " FILTER NOT EXISTS { ?obs <http://lada/gen_genre> ?genre } . "
            if obj['nogenre'] == 2:
                query = query + " ?obs a <http://purl.org/linked-data/cube#Observation> . "

            if obj['nogenre'] == 3:
                query = query + " ?obs <http://lada/gen_genre> ?genre . "
        else:
            if len(genreFilters) > 0:
                joined = ", ".join(genreFilters)
                query = query + "?obs <http://lada/genre> ?genre . FILTER(?genre IN(" + joined + ")) . "

        #if not obj['nocorpus'] and not obj['noexpression'] and not obj['nogenre'] and not obj['nofunction']:

    #        query = query + "  ?obs a <http://purl.org/linked-data/cube#Observation> . "

        query = query + "}"

        main_logger.info(query)
        #qres = (gpubs + ggen + ggroup + gmap + ginf + glcd).query(query)
        qres = self.dm.query_all(query)
        gfiltered = Graph()
        for triple in qres:
            gfiltered.add(triple)

        gfiltered.serialize("gfiltered.ttl", format="turtle")
        self.dm.add_graph(gfiltered, 'gfiltered')
        resp.status = falcon.HTTP_200
        resp.content_type = 'application/json'
        resp.data = json.dumps({'status': 'OK'})