Пример #1
0
    def get(self, portalid, date, datasetid):
        d = parseDate(date)
        sn = getSnapshotfromTime(d)

        session=current_app.config['dbsession']
        p = session.query(Portal).filter(Portal.id == portalid).first()

        q = session.query(Dataset) \
            .filter(Dataset.snapshot <= sn) \
            .filter(Dataset.portalid == portalid) \
            .filter(Dataset.id == datasetid) \
            .order_by(Dataset.snapshot.desc())
        dataset = q.first()

        if dataset:
            snapshot = dataset.snapshot

            q = session.query(DatasetQuality) \
                .join(Dataset, DatasetQuality.md5 == Dataset.md5) \
                .filter(Dataset.snapshot == snapshot) \
                .filter(Dataset.portalid == portalid) \
                .filter(Dataset.id == datasetid)
            dataset_qual = q.first()

            # get rdf graph and add measures and dimensions
            g, ds_id = dqv_export._get_measures_for_dataset(p, dataset, dataset_qual)
            dqv_export.add_dimensions_and_metrics(g)
            resp = jsonify(json.loads(g.serialize(format="json-ld")))
            timegate = '<' + HOST + '/' + portalid + '/' + datasetid + '/dqv>'
            return add_memento_header(resp, ds_id.n3(), timegate, snapshot)
        else:
            return jsonify({'error': 'There is no version of dataset ' + datasetid + ' available that is older than ' + str(d),
                        'portalid': portalid})
Пример #2
0
def portalsdynamicy():
    with Timer(key="get_portalsdynamicy", verbose=True):
        snapshot = getSnapshotfromTime(datetime.datetime.now())
        Session = current_app.config['dbsession']

        with Timer(key="query_portalsdynamicy", verbose=True):
            res = [
                r
                for r in Session.query(Portal).join(PortalSnapshotDynamicity).
                filter(PortalSnapshotDynamicity.snapshot ==
                       snapshot).add_entity(PortalSnapshotDynamicity)
            ]
        results = []
        keys = [
            'dindex', 'changefrequ', 'adddelratio', 'dyratio', 'staticRatio',
            'addRatio', 'delRatio', 'updatedRatio'
        ]
        for r in res:
            d = row2dict(r)
            for k in keys:
                d[k] = r[1].__getattribute__(k)

            results.append(d)

        df = pd.DataFrame(results)

        for c in keys:
            df[c] = df[c].convert_objects(convert_numeric=True)

        return render('odpw_portals_dynamics.jinja',
                      data={'portals': results},
                      keys=keys,
                      snapshot=snapshot)
Пример #3
0
    def get(self, portalid, datasetid):
        if request.headers.get('Accept-Datetime'):
            acc_dt = request.headers['Accept-Datetime']
            sn = getSnapshotfromTime(parse_rfc1123(acc_dt))
        else:
            sn = getCurrentSnapshot()


        session = current_app.config['dbsession']
        p = session.query(Portal).filter(Portal.id == portalid).first()
        q = session.query(DatasetQuality) \
            .join(Dataset, DatasetQuality.md5 == Dataset.md5) \
            .filter(Dataset.snapshot == sn) \
            .filter(Dataset.portalid == portalid) \
            .filter(Dataset.id == datasetid)
        dataset_qual = q.first()

        q = session.query(Dataset) \
            .filter(Dataset.snapshot == sn) \
            .filter(Dataset.portalid == portalid) \
            .filter(Dataset.id == datasetid)
        dataset = q.first()
        # get rdf graph and add measures and dimensions
        g, ds_id = dqv_export._get_measures_for_dataset(p, dataset, dataset_qual)
        dqv_export.add_dimensions_and_metrics(g)
        resp = jsonify(json.loads(g.serialize(format="json-ld")))
        timegate = '<' + HOST + '/' + portalid + '/' + datasetid + '/dqv>'
        resp = add_memento_header(resp, ds_id.n3(), timegate, sn)

        resp.headers['Vary'] = 'accept-datetime'
        d = tofirstdayinisoweek(sn)
        full_url = '<' + HOST + '/' + portalid + '/' + d.strftime("%y%m%d") + '/' + datasetid + '/dqv>'
        resp.headers['Content-Location'] = full_url
        return resp
Пример #4
0
    def get(self, portalid, datasetid):
        if request.headers.get('Accept-Datetime'):
            acc_dt = request.headers['Accept-Datetime']
            sn = getSnapshotfromTime(parse_rfc1123(acc_dt))
        else:
            sn = getCurrentSnapshot()

        session = current_app.config['dbsession']

        q = session.query(DatasetData) \
            .join(Dataset, DatasetData.md5 == Dataset.md5) \
            .filter(Dataset.snapshot == sn) \
            .filter(Dataset.portalid == portalid) \
            .filter(Dataset.id == datasetid)
        data = q.first()
        p = session.query(Portal).filter(Portal.id == portalid).first()
        doc = dcat_to_schemadotorg.convert(p, data.raw)
        timegate = '<' + HOST + '/' + portalid + '/' + datasetid + '/schemadotorg>'
        resp = add_memento_header(jsonify(doc), '<' + doc['@id'] + '>', timegate, sn)

        resp.headers['Vary'] = 'accept-datetime'
        d = tofirstdayinisoweek(sn)
        full_url = '<' + HOST + '/' + portalid + '/' + d.strftime("%y%m%d") + '/' + datasetid + '/schemadotorg>'
        resp.headers['Content-Location'] = full_url
        return resp
Пример #5
0
def portaldash():
    with Timer(key="get_portaldash", verbose=True):
        data = {}
        cursn = getSnapshotfromTime(datetime.datetime.now())
        Session = current_app.config['dbsession']
        with Timer(key="query_portaldash", verbose=True):
            data['portals'] = [
                row2dict(r) for r in Session.query(Portal).all()
            ]
        return render("odpw_portaldash.jinja", data=data, snapshot=cursn)
Пример #6
0
def portal(portalid, snapshot=getSnapshotfromTime(datetime.datetime.now())):
    with Timer(key="get_portal", verbose=True):
        current_sn = snapshot
        Session = current_app.config['dbsession']
        data = getPortalInfos(Session, portalid, snapshot)
        dynamicityEnabled = current_app.config.get('dynamicity', False)

        with Timer(key="query_portal", verbose=True):
            q = Session.query(Portal).filter(Portal.id == portalid) \
                .join(PortalSnapshotQuality, PortalSnapshotQuality.portalid == Portal.id) \
                .filter(PortalSnapshotQuality.snapshot == snapshot) \
                .join(PortalSnapshot, PortalSnapshot.portalid == Portal.id) \
                .filter(PortalSnapshot.snapshot == snapshot) \
                .add_entity(PortalSnapshot) \
                .add_entity(PortalSnapshotQuality)

            if dynamicityEnabled:
                q = q.join(PortalSnapshotDynamicity, PortalSnapshotDynamicity.portalid == Portal.id) \
                    .filter(PortalSnapshotDynamicity.snapshot == snapshot) \
                    .add_entity(PortalSnapshotDynamicity)
            r = q.first()
            while r is None:
                snapshot = getPreviousWeek(snapshot)
                q = Session.query(Portal).filter(Portal.id == portalid) \
                    .join(PortalSnapshotQuality, PortalSnapshotQuality.portalid == Portal.id) \
                    .filter(PortalSnapshotQuality.snapshot == snapshot) \
                    .join(PortalSnapshot, PortalSnapshot.portalid == Portal.id) \
                    .filter(PortalSnapshot.snapshot == snapshot) \
                    .add_entity(PortalSnapshot) \
                    .add_entity(PortalSnapshotQuality)

                if dynamicityEnabled:
                    q = q.join(PortalSnapshotDynamicity, PortalSnapshotDynamicity.portalid == Portal.id) \
                        .filter(PortalSnapshotDynamicity.snapshot == snapshot) \
                        .add_entity(PortalSnapshotDynamicity)
                r = q.first()

            data['portal'] = row2dict(r[0])
            data['fetchInfo'] = row2dict(r[1])
            data['fetchInfo']['duration'] = data['fetchInfo']['end'] - data[
                'fetchInfo']['start']

            if dynamicityEnabled:
                data['dynamicity'] = row2dict(r[3])
            data['quality'] = row2dict(r[2])

        #with Timer(key="query_portal_agg", verbose=True):
        #    data.update(aggregatePortalInfo(Session,portalid,snapshot,dbc))
        return render("odpw_portal.jinja",
                      snapshot=current_sn,
                      portalid=portalid,
                      data=data)
Пример #7
0
    def get(self, portalid, datasetid):
        if request.headers.get('Accept-Datetime'):
            acc_dt = request.headers['Accept-Datetime']
            sn = getSnapshotfromTime(parse_rfc1123(acc_dt))
        else:
            sn = getCurrentSnapshot()

        resp = get_dataset(portalid, sn, datasetid)
        resp.headers['Vary'] = 'accept-datetime'
        d = tofirstdayinisoweek(sn)
        full_url = HOST + '/' + portalid + '/' + d.strftime("%Y%m%d") + '/' + datasetid
        resp.headers['Content-Location'] = full_url
        return resp
Пример #8
0
def portalreport(portalid,
                 snapshot=getSnapshotfromTime(datetime.datetime.now())):
    with Timer(key="get_portal", verbose=True):

        Session = current_app.config['dbsession']
        data = getPortalInfos(Session, portalid, snapshot)
        with Timer(key="query_portalreport", verbose=True):
            q = Session.query(Dataset.organisation) \
                .filter(Dataset.portalid == portalid) \
                .filter(Dataset.snapshot == snapshot).distinct(Dataset.organisation)

            data['organisations'] = [row2dict(res) for res in q]
        return render("odpw_portal_report.jinja",
                      snapshot=snapshot,
                      portalid=portalid,
                      data=data)
Пример #9
0
def systemchanges():
    with Timer(key="get_systemchanges"):

        Session = current_app.config['dbsession']
        cursn = getSnapshotfromTime(datetime.datetime.now())
        prevWeek = getPreviousWeek(cursn)

        with Timer(key="query_systemchanges"):
            data_cur = {
                r.portalid: r
                for r in Session.query(PortalSnapshot).filter(
                    PortalSnapshot.snapshot == cursn)
            }
            data_prev = {
                r.portalid: r
                for r in Session.query(PortalSnapshot).filter(
                    PortalSnapshot.snapshot == prevWeek)
            }

        data = {'status_change': {}, 'ds_change': {}, 'res_change': {}}
        for pid, ps in data_cur.items():
            if pid in data_prev:
                if ps.status == data_prev[pid].status:
                    if ps.datasetcount != data_prev[pid].datasetcount:
                        dsfrom = data_prev[pid].datasetcount if data_prev[
                            pid].datasetcount is not None else 0
                        dsto = ps.datasetcount if ps.datasetcount is not None else 0
                        data['ds_change'][pid] = {'from': dsfrom, 'to': dsto}
                    elif ps.resourcecount != data_prev[pid].resourcecount:
                        resfrom = data_prev[pid].resourcecount if data_prev[
                            pid].resourcecount is not None else 0
                        resto = ps.resourcecount if ps.resourcecount is not None else 0
                        data['res_change'][pid] = {
                            'from': resfrom,
                            'to': resto
                        }
                else:
                    data['status_change'][pid] = {
                        'from': data_prev[pid].status,
                        'to': ps.status
                    }

        data['from'] = prevWeek
        data['to'] = cursn

        return render("odpw_system_changes.jinja", data=data)
Пример #10
0
def systemfetch():
    with Timer(key="get_systemfetch"):
        Session = current_app.config['dbsession']

        cursn = getSnapshotfromTime(datetime.datetime.now())
        snapshots = getLastNSnapshots(cursn, n=5)
        nWeeksago = snapshots[-1]

        cnts = defaultdict(int)
        data = {}
        with Timer(key="query_systemfetch"):
            for r in Session.query(PortalSnapshot.snapshot,
                                   PortalSnapshot.start, PortalSnapshot.end -
                                   PortalSnapshot.start).filter(
                                       PortalSnapshot.snapshot > nWeeksago):
                sn, start, dur = r[0], r[1], r[2]
                cnts[sn] += 1

                d = data.setdefault(sn, {})
                if dur is not None:
                    ds = d.setdefault(start, [])
                    ds.append(dur.total_seconds())

        for sn, d in data.items():
            dd = []
            gstart = min(d.keys())

            for start, durations in d.items():
                for dur in durations:
                    delta = (start - gstart).total_seconds() + dur
                    dd.append(delta)
            data[sn] = dd

        with Timer(key="plot_systemfetch"):
            p = fetchProcessChart(data, cnts)
            script, div = components(p)

            js_resources = INLINE.render_js()
            css_resources = INLINE.render_css()

        return render("odpw_system_fetch.jinja",
                      plot_script=script,
                      plot_div=div,
                      js_resources=js_resources,
                      css_resources=css_resources)
Пример #11
0
def portalOrgareport(portalid,
                     orga,
                     snapshot=getSnapshotfromTime(datetime.datetime.now())):
    with Timer(key="get_portal", verbose=True):

        Session = current_app.config['dbsession']
        data = getPortalInfos(Session, portalid, snapshot)

        with Timer(key="query_portalreport", verbose=True):
            portal = Session.query(Portal).filter(
                Portal.id == portalid).first()
            data['contacts'] = contactPerOrga(Session, portal, snapshot, orga)

        return render("odpw_portal_report_contacts.jinja",
                      snapshot=snapshot,
                      portalid=portalid,
                      data=data,
                      organisation=orga)
Пример #12
0
def portalsquality():
    with Timer(key="get_portalsquality", verbose=True):

        Session = current_app.config['dbsession']
        snapshot = getSnapshotfromTime(datetime.datetime.now())

        with Timer(key="query_portalsquality"):
            results = [
                row2dict(r) for r in Session.query(
                    Portal, Portal.datasetcount,
                    Portal.resourcecount).join(PortalSnapshotQuality).filter(
                        PortalSnapshotQuality.snapshot == snapshot).add_entity(
                            PortalSnapshotQuality)
            ]

        keys = [i.lower() for q in qa for i in q['metrics']]
        df = pd.DataFrame(results)

        #print df
        for c in keys:
            #print c,df[c]
            #print '___'*10
            df[c] = df[c].convert_objects(convert_numeric=True)

        dfiso = df.groupby(['iso'])
        dfiso=dfiso.agg('mean')\
             .join(pd.DataFrame(dfiso.size(),columns=['count']))
        resultsIso = dfiso.reset_index().to_dict(orient='records')

        dfsoft = df.groupby(['software'])
        dfsoft=dfsoft.agg('mean')\
             .join(pd.DataFrame(dfsoft.size(),columns=['count']))
        resultSoft = dfsoft.reset_index().to_dict(orient='records')

        return render('odpw_portals_quality.jinja',
                      data={
                          'portals': results,
                          'iso': resultsIso,
                          'soft': resultSoft
                      },
                      keys=keys,
                      snapshot=snapshot)
Пример #13
0
    def get(self, portalid, date, datasetid):
        d = parseDate(date)
        sn = getSnapshotfromTime(d)

        session=current_app.config['dbsession']
        q=session.query(DatasetData) \
            .join(Dataset, DatasetData.md5 == Dataset.md5) \
            .filter(Dataset.snapshot<=sn)\
            .filter(Dataset.portalid==portalid)\
            .filter(Dataset.id == datasetid) \
            .order_by(Dataset.snapshot.desc())
        data = q.first()

        if data:
            p = session.query(Portal).filter(Portal.id == portalid).first()
            doc = dcat_to_schemadotorg.convert(p, data.raw)
            timegate = '<' + HOST + '/' + portalid + '/' + datasetid + '/schemadotorg>'
            return add_memento_header(jsonify(doc), '<' + doc['@id'] + '>', timegate, sn)
        else:
            resp = jsonify({'error': 'There is no version of dataset ' + datasetid + ' available that is older than ' + str(d), 'portalid': portalid})
            resp.status_code = 404
            return resp
Пример #14
0
def licensesearch(uri=None):
    with Timer(key="get_licensesearch", verbose=True):
        data = {}
        if uri != None:
            cursn = getPreviousWeek(
                getSnapshotfromTime(datetime.datetime.now()))
            Session = current_app.config['dbsession']

            with Timer(key="query_licensesearch"):
                q = Session.query(Dataset, DatasetData) \
                    .join(MetaResource, Dataset.md5 == MetaResource.md5) \
                    .join(DatasetData, Dataset.md5 == DatasetData.md5) \
                    .filter(Dataset.snapshot == cursn) \
                    .filter(MetaResource.uri == uri)
                results = []

                for r in q:
                    results.append(row2dict(r))

            data['uri'] = uri
            data['snapshot'] = cursn
            data['results'] = results
        return render("odpw_license_search.jinja", data=data)
Пример #15
0
 def get(self, portalid, date, datasetid):
     d = parseDate(date)
     sn = getSnapshotfromTime(d)
     return get_dcat(portalid, datasetid, sn)