def get(self, portalid, date, datasetid): d = parseDate(date) sn = getSnapshotfromTime(d) session=current_app.config['dbsession'] p = session.query(Portal).filter(Portal.id == portalid).first() q = session.query(Dataset) \ .filter(Dataset.snapshot <= sn) \ .filter(Dataset.portalid == portalid) \ .filter(Dataset.id == datasetid) \ .order_by(Dataset.snapshot.desc()) dataset = q.first() if dataset: snapshot = dataset.snapshot q = session.query(DatasetQuality) \ .join(Dataset, DatasetQuality.md5 == Dataset.md5) \ .filter(Dataset.snapshot == snapshot) \ .filter(Dataset.portalid == portalid) \ .filter(Dataset.id == datasetid) dataset_qual = q.first() # get rdf graph and add measures and dimensions g, ds_id = dqv_export._get_measures_for_dataset(p, dataset, dataset_qual) dqv_export.add_dimensions_and_metrics(g) resp = jsonify(json.loads(g.serialize(format="json-ld"))) timegate = '<' + HOST + '/' + portalid + '/' + datasetid + '/dqv>' return add_memento_header(resp, ds_id.n3(), timegate, snapshot) else: return jsonify({'error': 'There is no version of dataset ' + datasetid + ' available that is older than ' + str(d), 'portalid': portalid})
def portalsdynamicy(): with Timer(key="get_portalsdynamicy", verbose=True): snapshot = getSnapshotfromTime(datetime.datetime.now()) Session = current_app.config['dbsession'] with Timer(key="query_portalsdynamicy", verbose=True): res = [ r for r in Session.query(Portal).join(PortalSnapshotDynamicity). filter(PortalSnapshotDynamicity.snapshot == snapshot).add_entity(PortalSnapshotDynamicity) ] results = [] keys = [ 'dindex', 'changefrequ', 'adddelratio', 'dyratio', 'staticRatio', 'addRatio', 'delRatio', 'updatedRatio' ] for r in res: d = row2dict(r) for k in keys: d[k] = r[1].__getattribute__(k) results.append(d) df = pd.DataFrame(results) for c in keys: df[c] = df[c].convert_objects(convert_numeric=True) return render('odpw_portals_dynamics.jinja', data={'portals': results}, keys=keys, snapshot=snapshot)
def get(self, portalid, datasetid): if request.headers.get('Accept-Datetime'): acc_dt = request.headers['Accept-Datetime'] sn = getSnapshotfromTime(parse_rfc1123(acc_dt)) else: sn = getCurrentSnapshot() session = current_app.config['dbsession'] p = session.query(Portal).filter(Portal.id == portalid).first() q = session.query(DatasetQuality) \ .join(Dataset, DatasetQuality.md5 == Dataset.md5) \ .filter(Dataset.snapshot == sn) \ .filter(Dataset.portalid == portalid) \ .filter(Dataset.id == datasetid) dataset_qual = q.first() q = session.query(Dataset) \ .filter(Dataset.snapshot == sn) \ .filter(Dataset.portalid == portalid) \ .filter(Dataset.id == datasetid) dataset = q.first() # get rdf graph and add measures and dimensions g, ds_id = dqv_export._get_measures_for_dataset(p, dataset, dataset_qual) dqv_export.add_dimensions_and_metrics(g) resp = jsonify(json.loads(g.serialize(format="json-ld"))) timegate = '<' + HOST + '/' + portalid + '/' + datasetid + '/dqv>' resp = add_memento_header(resp, ds_id.n3(), timegate, sn) resp.headers['Vary'] = 'accept-datetime' d = tofirstdayinisoweek(sn) full_url = '<' + HOST + '/' + portalid + '/' + d.strftime("%y%m%d") + '/' + datasetid + '/dqv>' resp.headers['Content-Location'] = full_url return resp
def get(self, portalid, datasetid): if request.headers.get('Accept-Datetime'): acc_dt = request.headers['Accept-Datetime'] sn = getSnapshotfromTime(parse_rfc1123(acc_dt)) else: sn = getCurrentSnapshot() session = current_app.config['dbsession'] q = session.query(DatasetData) \ .join(Dataset, DatasetData.md5 == Dataset.md5) \ .filter(Dataset.snapshot == sn) \ .filter(Dataset.portalid == portalid) \ .filter(Dataset.id == datasetid) data = q.first() p = session.query(Portal).filter(Portal.id == portalid).first() doc = dcat_to_schemadotorg.convert(p, data.raw) timegate = '<' + HOST + '/' + portalid + '/' + datasetid + '/schemadotorg>' resp = add_memento_header(jsonify(doc), '<' + doc['@id'] + '>', timegate, sn) resp.headers['Vary'] = 'accept-datetime' d = tofirstdayinisoweek(sn) full_url = '<' + HOST + '/' + portalid + '/' + d.strftime("%y%m%d") + '/' + datasetid + '/schemadotorg>' resp.headers['Content-Location'] = full_url return resp
def portaldash(): with Timer(key="get_portaldash", verbose=True): data = {} cursn = getSnapshotfromTime(datetime.datetime.now()) Session = current_app.config['dbsession'] with Timer(key="query_portaldash", verbose=True): data['portals'] = [ row2dict(r) for r in Session.query(Portal).all() ] return render("odpw_portaldash.jinja", data=data, snapshot=cursn)
def portal(portalid, snapshot=getSnapshotfromTime(datetime.datetime.now())): with Timer(key="get_portal", verbose=True): current_sn = snapshot Session = current_app.config['dbsession'] data = getPortalInfos(Session, portalid, snapshot) dynamicityEnabled = current_app.config.get('dynamicity', False) with Timer(key="query_portal", verbose=True): q = Session.query(Portal).filter(Portal.id == portalid) \ .join(PortalSnapshotQuality, PortalSnapshotQuality.portalid == Portal.id) \ .filter(PortalSnapshotQuality.snapshot == snapshot) \ .join(PortalSnapshot, PortalSnapshot.portalid == Portal.id) \ .filter(PortalSnapshot.snapshot == snapshot) \ .add_entity(PortalSnapshot) \ .add_entity(PortalSnapshotQuality) if dynamicityEnabled: q = q.join(PortalSnapshotDynamicity, PortalSnapshotDynamicity.portalid == Portal.id) \ .filter(PortalSnapshotDynamicity.snapshot == snapshot) \ .add_entity(PortalSnapshotDynamicity) r = q.first() while r is None: snapshot = getPreviousWeek(snapshot) q = Session.query(Portal).filter(Portal.id == portalid) \ .join(PortalSnapshotQuality, PortalSnapshotQuality.portalid == Portal.id) \ .filter(PortalSnapshotQuality.snapshot == snapshot) \ .join(PortalSnapshot, PortalSnapshot.portalid == Portal.id) \ .filter(PortalSnapshot.snapshot == snapshot) \ .add_entity(PortalSnapshot) \ .add_entity(PortalSnapshotQuality) if dynamicityEnabled: q = q.join(PortalSnapshotDynamicity, PortalSnapshotDynamicity.portalid == Portal.id) \ .filter(PortalSnapshotDynamicity.snapshot == snapshot) \ .add_entity(PortalSnapshotDynamicity) r = q.first() data['portal'] = row2dict(r[0]) data['fetchInfo'] = row2dict(r[1]) data['fetchInfo']['duration'] = data['fetchInfo']['end'] - data[ 'fetchInfo']['start'] if dynamicityEnabled: data['dynamicity'] = row2dict(r[3]) data['quality'] = row2dict(r[2]) #with Timer(key="query_portal_agg", verbose=True): # data.update(aggregatePortalInfo(Session,portalid,snapshot,dbc)) return render("odpw_portal.jinja", snapshot=current_sn, portalid=portalid, data=data)
def get(self, portalid, datasetid): if request.headers.get('Accept-Datetime'): acc_dt = request.headers['Accept-Datetime'] sn = getSnapshotfromTime(parse_rfc1123(acc_dt)) else: sn = getCurrentSnapshot() resp = get_dataset(portalid, sn, datasetid) resp.headers['Vary'] = 'accept-datetime' d = tofirstdayinisoweek(sn) full_url = HOST + '/' + portalid + '/' + d.strftime("%Y%m%d") + '/' + datasetid resp.headers['Content-Location'] = full_url return resp
def portalreport(portalid, snapshot=getSnapshotfromTime(datetime.datetime.now())): with Timer(key="get_portal", verbose=True): Session = current_app.config['dbsession'] data = getPortalInfos(Session, portalid, snapshot) with Timer(key="query_portalreport", verbose=True): q = Session.query(Dataset.organisation) \ .filter(Dataset.portalid == portalid) \ .filter(Dataset.snapshot == snapshot).distinct(Dataset.organisation) data['organisations'] = [row2dict(res) for res in q] return render("odpw_portal_report.jinja", snapshot=snapshot, portalid=portalid, data=data)
def systemchanges(): with Timer(key="get_systemchanges"): Session = current_app.config['dbsession'] cursn = getSnapshotfromTime(datetime.datetime.now()) prevWeek = getPreviousWeek(cursn) with Timer(key="query_systemchanges"): data_cur = { r.portalid: r for r in Session.query(PortalSnapshot).filter( PortalSnapshot.snapshot == cursn) } data_prev = { r.portalid: r for r in Session.query(PortalSnapshot).filter( PortalSnapshot.snapshot == prevWeek) } data = {'status_change': {}, 'ds_change': {}, 'res_change': {}} for pid, ps in data_cur.items(): if pid in data_prev: if ps.status == data_prev[pid].status: if ps.datasetcount != data_prev[pid].datasetcount: dsfrom = data_prev[pid].datasetcount if data_prev[ pid].datasetcount is not None else 0 dsto = ps.datasetcount if ps.datasetcount is not None else 0 data['ds_change'][pid] = {'from': dsfrom, 'to': dsto} elif ps.resourcecount != data_prev[pid].resourcecount: resfrom = data_prev[pid].resourcecount if data_prev[ pid].resourcecount is not None else 0 resto = ps.resourcecount if ps.resourcecount is not None else 0 data['res_change'][pid] = { 'from': resfrom, 'to': resto } else: data['status_change'][pid] = { 'from': data_prev[pid].status, 'to': ps.status } data['from'] = prevWeek data['to'] = cursn return render("odpw_system_changes.jinja", data=data)
def systemfetch(): with Timer(key="get_systemfetch"): Session = current_app.config['dbsession'] cursn = getSnapshotfromTime(datetime.datetime.now()) snapshots = getLastNSnapshots(cursn, n=5) nWeeksago = snapshots[-1] cnts = defaultdict(int) data = {} with Timer(key="query_systemfetch"): for r in Session.query(PortalSnapshot.snapshot, PortalSnapshot.start, PortalSnapshot.end - PortalSnapshot.start).filter( PortalSnapshot.snapshot > nWeeksago): sn, start, dur = r[0], r[1], r[2] cnts[sn] += 1 d = data.setdefault(sn, {}) if dur is not None: ds = d.setdefault(start, []) ds.append(dur.total_seconds()) for sn, d in data.items(): dd = [] gstart = min(d.keys()) for start, durations in d.items(): for dur in durations: delta = (start - gstart).total_seconds() + dur dd.append(delta) data[sn] = dd with Timer(key="plot_systemfetch"): p = fetchProcessChart(data, cnts) script, div = components(p) js_resources = INLINE.render_js() css_resources = INLINE.render_css() return render("odpw_system_fetch.jinja", plot_script=script, plot_div=div, js_resources=js_resources, css_resources=css_resources)
def portalOrgareport(portalid, orga, snapshot=getSnapshotfromTime(datetime.datetime.now())): with Timer(key="get_portal", verbose=True): Session = current_app.config['dbsession'] data = getPortalInfos(Session, portalid, snapshot) with Timer(key="query_portalreport", verbose=True): portal = Session.query(Portal).filter( Portal.id == portalid).first() data['contacts'] = contactPerOrga(Session, portal, snapshot, orga) return render("odpw_portal_report_contacts.jinja", snapshot=snapshot, portalid=portalid, data=data, organisation=orga)
def portalsquality(): with Timer(key="get_portalsquality", verbose=True): Session = current_app.config['dbsession'] snapshot = getSnapshotfromTime(datetime.datetime.now()) with Timer(key="query_portalsquality"): results = [ row2dict(r) for r in Session.query( Portal, Portal.datasetcount, Portal.resourcecount).join(PortalSnapshotQuality).filter( PortalSnapshotQuality.snapshot == snapshot).add_entity( PortalSnapshotQuality) ] keys = [i.lower() for q in qa for i in q['metrics']] df = pd.DataFrame(results) #print df for c in keys: #print c,df[c] #print '___'*10 df[c] = df[c].convert_objects(convert_numeric=True) dfiso = df.groupby(['iso']) dfiso=dfiso.agg('mean')\ .join(pd.DataFrame(dfiso.size(),columns=['count'])) resultsIso = dfiso.reset_index().to_dict(orient='records') dfsoft = df.groupby(['software']) dfsoft=dfsoft.agg('mean')\ .join(pd.DataFrame(dfsoft.size(),columns=['count'])) resultSoft = dfsoft.reset_index().to_dict(orient='records') return render('odpw_portals_quality.jinja', data={ 'portals': results, 'iso': resultsIso, 'soft': resultSoft }, keys=keys, snapshot=snapshot)
def get(self, portalid, date, datasetid): d = parseDate(date) sn = getSnapshotfromTime(d) session=current_app.config['dbsession'] q=session.query(DatasetData) \ .join(Dataset, DatasetData.md5 == Dataset.md5) \ .filter(Dataset.snapshot<=sn)\ .filter(Dataset.portalid==portalid)\ .filter(Dataset.id == datasetid) \ .order_by(Dataset.snapshot.desc()) data = q.first() if data: p = session.query(Portal).filter(Portal.id == portalid).first() doc = dcat_to_schemadotorg.convert(p, data.raw) timegate = '<' + HOST + '/' + portalid + '/' + datasetid + '/schemadotorg>' return add_memento_header(jsonify(doc), '<' + doc['@id'] + '>', timegate, sn) else: resp = jsonify({'error': 'There is no version of dataset ' + datasetid + ' available that is older than ' + str(d), 'portalid': portalid}) resp.status_code = 404 return resp
def licensesearch(uri=None): with Timer(key="get_licensesearch", verbose=True): data = {} if uri != None: cursn = getPreviousWeek( getSnapshotfromTime(datetime.datetime.now())) Session = current_app.config['dbsession'] with Timer(key="query_licensesearch"): q = Session.query(Dataset, DatasetData) \ .join(MetaResource, Dataset.md5 == MetaResource.md5) \ .join(DatasetData, Dataset.md5 == DatasetData.md5) \ .filter(Dataset.snapshot == cursn) \ .filter(MetaResource.uri == uri) results = [] for r in q: results.append(row2dict(r)) data['uri'] = uri data['snapshot'] = cursn data['results'] = results return render("odpw_license_search.jinja", data=data)
def get(self, portalid, date, datasetid): d = parseDate(date) sn = getSnapshotfromTime(d) return get_dcat(portalid, datasetid, sn)