from pcapi.db import tokens from dropbox import client, session, rest from db import tokens from urlparse import urlsplit, urlunsplit ### Static Variables ### APP_KEY = config.get("dropbox", "app_key") APP_SECRET = config.get("dropbox", "app_secret") ACCESS_TYPE = 'app_folder' # should be 'dropbox' or 'app_folder' as configured for your app STATE_CODES = {"verify_token": 0, "connected": 1, "non_authorized": 2} # CAPABILITIES that this provider supports CAPABILITIES = ["oauth", "search", "synchronize", "delete"] log = logtool.getLogger("DropboxProvider", "pcapi") ######################### class Metadata(object): """ metadata of files/dir as returned from dropbox. This is plain filesystem metadata and NOT high-level pcapi metadata for records or editors""" def __init__(self, md): self.md = md def __str__(self): return ` self.md ` def mtime(self, fmt=None): """ Return last modification time of self. Args:
""" This module is responsible for the mapping between json properties and postgis SQL Data Definition and Data Manipulation Language (DDL & DML) i.e schema and data""" import json, re from pcapi import logtool log = logtool.getLogger("mapping", "pcapi.publish") def mapping(js_rec,userid): """ Takes records as json and returns and array of [<tablename>, <title>, <DDL>, <DML>] values for SQL substitution. Furthermore, it adds userid and "compulsory QA values" e.g. pos_acc """ # parse record json rec = json.loads(js_rec) # check if table exists -- defined by editor field without the ".edtr" extension # However there is an unresolved FTOpen bug where sometimes the ".edtr" is missing. if( len(rec["properties"]["editor"]) == 41 ): tname = rec["properties"]["editor"][:-5] else: tname = rec["properties"]["editor"] log.debug('Workaround -- record {0} sent without ".edtr" suffix'.format(rec["name"])) tname = whitelist_table(tname) # title is purely for making people using geoserver directly if ( rec["properties"].has_key("title") ): title = rec["properties"]["title"] else:
""" Convert PCAPI's GeoJSON to a simple GeoJSON file that is specifically parsable by QA""" import json, re, sets from pcapi import logtool log = logtool.getLogger("json2qa", "pcapi.publish") def dbg(x): print x def mapping(recs, normalize=True): """ Takes records as json featurecolletion (parsed) and returns a new "flat" featurecolletion with compatible with "simple features" encoding that will rename all properties in a way that doesn't break QA. @param {dict} recs -- JSON (parsed) as produced by PCAPI with id,val,label triplets @param Normalize -- Whether to add null properties where appropriate to make sure all features have the exact same parameters. @returns {dict} flat GeoJSON, compatible with QA """ headers = sets.Set() for i in xrange ( len(recs["features"]) ): dbg("processing {0}".format(str(i))) rec = recs["features"][i] # Move all properties.fields on top to properties and remove fields for p in rec["properties"]["fields"]: h = whitelist_column(p["label"]) # If "val" is missing (this is a common FTOpen regression), add an empty field if not p.has_key("val"): p["val"] = "" value = p["val"] # move properties.fields on top to rec.properties
""" Convert PCAPI's GeoJSON to a simple GeoJSON file that is specifically parsable by QA""" import json, re, sets from pcapi import logtool log = logtool.getLogger("json2qa", "pcapi.publish") def dbg(x): print x def mapping(recs, normalize=True): """ Takes records as json featurecolletion (parsed) and returns a new "flat" featurecolletion with compatible with "simple features" encoding that will rename all properties in a way that doesn't break QA. @param {dict} recs -- JSON (parsed) as produced by PCAPI with id,val,label triplets @param Normalize -- Whether to add null properties where appropriate to make sure all features have the exact same parameters. @returns {dict} flat GeoJSON, compatible with QA """ headers = sets.Set() for i in xrange(len(recs["features"])): dbg("processing {0}".format(str(i))) rec = recs["features"][i] # Move all properties.fields on top to properties and remove fields for p in rec["properties"]["fields"]: h = whitelist_column(p["label"]) # If "val" is missing (this is a common FTOpen regression), add an empty field if not p.has_key("val"): p["val"] = ""
### Initialization code ### import os import psycopg2 import psycopg2.extensions from pcapi.fs_provider import FsProvider from pcapi.publish import mapping, geoserver # Needed for transparent unicode support psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY) from pcapi import config, logtool log = logtool.getLogger("postgis", "pcapi.publish") # full path of PostGIS database host = config.get("pg", "database_host") database = config.get("pg", "database_database") user = config.get("pg", "database_user") password = config.get("pg", "database_password") log.debug("Starting connection with PostGIS database: {0}@{1}".format( user, password)) # When host is not supplied then default to peer (UNIX sockets) authentication conn_string = "dbname={database} user={user}".format(database=database, user=user) if host: conn_string += " host={host} password={password}".format(host=host,
# -*- coding: utf-8 -*- from connection import conn, use_sqlite from pcapi import logtool #################### Dropbox,flickr etc. credential storage management ############# """ Wrapper functions around SQL command use for reading/writing/seaching access credentials for different providers. The result is normal a list of tuples e.g. [(foo,bar)] for one row and (None,) for no rows. NOTE: req_key (in dropbox lingo) is "userid" for pcapi """ log = logtool.getLogger("tokens", "pcapi") def dump_tokens(): c = conn.cursor() res = c.execute(""" SELECT * FROM tokens; """) # 2D list of lists return c.fetchall() def save_access_tokens(userid, req_secret, acc_key, acc_secret): c = conn.cursor() c.execute(SAVE_ACCESS_TOKENS, (userid, req_secret, acc_key, acc_secret, userid)) conn.commit() return c.rowcount == 1 def get_access_pair(userid):
""" Module to Communicate with Geonetworks's REST API. This is very COBWEB specific and is not recomended for people who can avoid custom APIs""" import urllib2, json, base64, threading from pcapi import config, logtool log = logtool.getLogger("geoserver", "pcapi.publish") # make sure this is global for all threads (obviously) lock = threading.Lock() class Surveys: def __init__ (self, surveys): """ API to access the contents of the geonetwork response """ # raw surveys resposne from geonetwork self._surveys = surveys # parsed summary of _surveys as an array of # [ {"sid", "coordinator", "title"} ... self._summary = [] # create a summary of surveys as an array of [ sid, coordinator uid, title ] self.count = int(self._surveys["summary"]["@count"]) if self.count == 0: return # empty surveys -- no point if self.count == 1: # geonetwork bug -- object instead of array for count==1 s = self._surveys["metadata"] self._summary = [ \ { "sid": s["geonet:info"]["uuid"], "coordinator" : s["userinfo"].split('|')[0], "title" : s["title"] } ,]
"""High level, HTTP-agnostic record access and processing""" import re, json from pcapi import logtool from pcapi.provider import filter_utils import time log = logtool.getLogger("provider", "records") class Record(object): """Class to store record bodies and metadata in memory for fast access""" def __init__(self, content, metadata): """Store content and metadata""" self.content = content # as a parsed json (dict) self.metadata = metadata # as a dbox_provider.Metadata object def convertToGeoJSON(records): """ Export all records to geojson and return result. """ # python dicts should be automatically converted to geojson by bottle # self.response.headers['Content-Type'] = 'application/json' features = [] for r in records: # log.debug(r.content) # get first -and only- value of dictionary because records are an array of # [ { <name> : <geojson feature> } ....]
from StringIO import StringIO from operator import itemgetter from wand.image import Image try: import threadpool except ImportError: sys.stderr.write("Error: Can't find threadpool...") from pcapi import ogr, dbox_provider, fs_provider, logtool, config from pcapi.form_validator import FormValidator, Editor from pcapi.cobweb_parser import COBWEBFormParser from pcapi.exceptions import DBException, FsException from pcapi.publish import postgis, geonetwork log = logtool.getLogger("PCAPIRest", "pcapi") #global number of threads default_number = 20 class Record(object): """ Class to store record bodies and metadata in memory for fast access""" def __init__(self, content, metadata): self.content = content # as a parsed json (dict) self.metadata = metadata # as a dbox_provider.Metadata object ################ Decorators #################### def authdec(): def decorator(f): def wrapper(*args, **kwargs):
"""Module to produce a WFS GetCapabilities response. The response includes all available featurestypes """ import os, json from pcapi import logtool, config, fs_provider from pcapi.provider import Records from bottle import template log = logtool.getLogger("WFS", "pcapi.ows") def _error(msg): log.error(msg) return {"error": 1, "response": msg} def dispatch(params, http_response): """Main function that dispatches the right function accrording to HTTP Request and response headers. @param params(dict): request headers """ wfs_version = params["version"] if "version" in params else None wfs_request = params["request"].upper() if "request" in params else None if not wfs_version: return _error("ERROR: WFS version was not specified!") if ((wfs_version != "1.1.0") and (wfs_version != "1.0.0")): return _error("WFS version %s is not supported" % wfs_version) if (wfs_request == "GETCAPABILITIES"): return getcapabilities(params)
#FS Provider tries to be "loosely" dropbox compliant to make your life a bit easier especially regarding the Metadata object # WARNING!!! # No `/' directory will be created unless you upload a file first. This is to avoid # a mess by clients calling /auth/local repeatedly and creating a new users every time without authentication. # Might revisit once we have authentication in place! import os import time import shutil import re from pcapi import logtool, config, helper from pcapi.exceptions import FsException log = logtool.getLogger("FsProvider", "pcapi") class Metadata(object): """ metadata of files/dir as returned from local filesystem. This is plain filesystem metadata and NOT high-level pcapi metadata for records or editors""" def __init__(self, md): self.md = md def __str__(self): return ` self.md ` def mtime(self, fmt=None): """ Return last modification time of self. Args: fmt (optional): format (s. strftime() system call) for the output date
### Static Variables ### APP_KEY = config.get("dropbox","app_key") APP_SECRET = config.get("dropbox","app_secret") ACCESS_TYPE = 'app_folder' # should be 'dropbox' or 'app_folder' as configured for your app STATE_CODES = { "verify_token": 0, "connected": 1, "non_authorized": 2 } # CAPABILITIES that this provider supports CAPABILITIES = [ "oauth", "search", "synchronize", "delete" ] log = logtool.getLogger("DropboxProvider", "pcapi") ######################### class Metadata(object): """ metadata of files/dir as returned from dropbox. This is plain filesystem metadata and NOT high-level pcapi metadata for records or editors""" def __init__ (self, md): self.md = md def __str__(self): return `self.md` def mtime(self, fmt=None): """ Return last modification time of self. Args:
# -*- coding: utf-8 -*- import connection from pcapi import logtool #################### Dropbox,flickr etc. credential storage management ############# """ Wrapper functions around SQL command use for reading/writing/seaching access credentials for different providers. The result is normal a list of tuples e.g. [(foo,bar)] for one row and (None,) for no rows. NOTE: req_key (in dropbox lingo) is "userid" for pcapi """ log = logtool.getLogger("tokens", "pcapi") def dump_tokens(): res = connection.execute(""" SELECT * FROM tokens; """) # 2D list of lists return res def save_access_tokens(userid, req_secret, acc_key, acc_secret): res = connection.execute( """ INSERT OR IGNORE INTO tokens(userid,reqsec,accsec,acckey) VALUES (?,?,?,?) """, (userid, req_secret, acc_key, acc_secret)) return res == []
# Database connection wrapper try: import pysqlite2.dbapi2 as db from pysqlite2.dbapi2 import OperationalError except ImportError: import sqlite3.dbapi2 as db from sqlite3.dbapi2 import OperationalError from pcapi import config, logtool log = logtool.getLogger("connection", "pcapi") # full path of sqlite3 database DB = config.get("path", "sessionsdb") log.debug(DB) # creating/connecting the test_db. # "check_same_thread" turns off some false alarms from sqlite3. # NOTE: mod_wsgi runs these global variables in *different* processes for each request. con = db.connect(DB, check_same_thread=False) def execute(sql, args=()): """ Execute *sql* statement using list *args* for sql substitution. PC-API was meant to be fault tolerant to all disk/database faults. This function tries to handle all possible errors by first regenerating missing tables and falling back to using a memory database if all else fails. Args: sql: SQL statement
"""High level, HTTP-agnostic record access and processing""" import re, json from pcapi import logtool from pcapi.provider import filter_utils import time log = logtool.getLogger("provider", "records") class Record(object): """Class to store record bodies and metadata in memory for fast access""" def __init__(self, content, metadata): """Store content and metadata""" self.content = content # as a parsed json (dict) self.metadata = metadata # as a dbox_provider.Metadata object def convertToGeoJSON(records): """ Export all records to geojson and return result. """ # python dicts should be automatically converted to geojson by bottle # self.response.headers['Content-Type'] = 'application/json' features = [] for r in records: # log.debug(r.content) # get first -and only- value of dictionary because records are an array of
""" Export filter for several formats supported by OGR. Implemented as a singleton. It is currently implemented as wrapper around ogr2ogr to facilitate prototype and easy of use. Prerequisites: ogr2ogr installed and specified under resources/config.ini file """ import subprocess from pcapi import logtool, config LOG = logtool.getLogger("ogr", "filters") OGR2OGR = config.get("ogr", "ogr2ogr") TARGET_POSTGIS = "PG:user={USER} dbname={DATABASE} host={HOST} password={PASSWORD}" def toPostGIS(data, userid): """ Export "/data.json" to configured PostGIS database. Assumes an up-to-date data.json. Returns: JSON object with status, new tablename, message """ # If an email is used for userid we need to change `@' and `.' to something valid # for Postgres tables tablename = userid.replace('@','_at_').replace('.','_dot_') host = config.get("pg","database_host") database = config.get("pg","database_database") user = config.get("pg","database_user") password = config.get("pg","database_password") target = TARGET_POSTGIS.format( USER=user, DATABASE=database, HOST=host, PASSWORD=password ) source = data call_array = [ OGR2OGR, "-overwrite", "-update", "-f", "PostgreSQL", target, \
import tempfile import uuid import urllib2 import time import zipfile from bottle import static_file from StringIO import StringIO from pcapi import ogr, fs_provider, helper, logtool from pcapi.exceptions import FsException from pcapi.provider import Records from pcapi.publish import postgis, geonetwork from pcapi.utils.writers import UnicodeWriter log = logtool.getLogger("PCAPIRest", "pcapi") class Record(object): """ Class to store record bodies and metadata in memory for fast access""" def __init__(self, content, metadata ): self.content = content # as a parsed json (dict) self.metadata = metadata # as a dbox_provider.Metadata object class PCAPIRest(object): """ REST part of the API. Return values should be direct json """ def __init__(self, request, response): self.request = request self.response = response self.provider = None self.rec_cache = []
# WARNING!!! # No `/' directory will be created unless you upload a file first. This is to avoid # a mess by clients calling /auth/local repeatedly and creating a new users every time without authentication. # Might revisit once we have authentication in place! import os import time import shutil import re from pcapi import logtool, config, helper from pcapi.exceptions import FsException log = logtool.getLogger("FsProvider", "pcapi") class Metadata(object): """ metadata of files/dir as returned from local filesystem. This is plain filesystem metadata and NOT high-level pcapi metadata for records or editors""" def __init__ (self, md): self.md = md def __str__(self): return `self.md` def mtime(self, fmt=None): """ Return last modification time of self. Args: fmt (optional): format (s. strftime() system call) for the output date
### Maps HTTP/REST requests to python functions ### ### They can all be tested ith wget/curl ### ####################################################### """ import bottle from bottle import route, request, response, static_file, hook ## pcapi imports from pcapi import logtool from pcapi import config from pcapi import varexport from pcapi.rest import PCAPIRest from pcapi.ows.OWS import OWSRest log = logtool.getLogger("pcapi") ### Provider capabilities ### @route('/auth/providers',method=["GET"]) def capabilities(): return PCAPIRest(request,response).capabilities() ### /export/ a public URL @route('/export/<provider>/<userid>/<path:path>', method=["GET"]) def export(userid, provider, path="/"): return PCAPIRest(request,response).export(provider, userid, path)
""" This module is responsible for the mapping between json properties and postgis SQL Data Definition and Data Manipulation Language (DDL & DML) i.e schema and data""" import json, re from pcapi import logtool log = logtool.getLogger("mapping", "pcapi.publish") def mapping(js_rec, userid): """ Takes records as json and returns and array of [<tablename>, <title>, <DDL>, <DML>] values for SQL substitution. Furthermore, it adds userid and "compulsory QA values" e.g. pos_acc """ # parse record json rec = json.loads(js_rec) # check if table exists -- defined by editor field without the ".edtr" extension # However there is an unresolved FTOpen bug where sometimes the ".edtr" is missing. if (len(rec["properties"]["editor"]) == 41): tname = rec["properties"]["editor"][:-5] else: tname = rec["properties"]["editor"] log.debug( 'Workaround -- record {0} sent without ".edtr" suffix'.format( rec["name"])) tname = whitelist_table(tname) # title is purely for making people using geoserver directly if (rec["properties"].has_key("title")):
"""Handles routes for OWS services. You can still test each service without HTTP by invoking them e.g. python ./wfs.py """ from pcapi import logtool, config, helper import WFS log = logtool.getLogger("OWS", "pcapi.ows") def xmlerror(msg, http_response): log.error(msg) http_response.content_type = 'text/xml; charset=utf-8' ogc_error = """<ServiceExceptionReport xmlns="http://www.opengis.net/ogc" \ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="1.2.0" \ xsi:schemaLocation="http://www.opengis.net/ogc \ http://schemas.opengis.net/wfs/1.0.0/OGC-exception.xsd"> <ServiceException code="InvalidParameterValue" locator="typeName">{}</ServiceException> </ServiceExceptionReport>""".format(msg) return ogc_error def OWSRest(http_request, http_response): """Handles HTTP calls and dispatches the right OWS function @param request(string):request headers @param response(string): response headers """ if not config.getboolean("ows", "enable"): return xmlerror("OWS support is disabled", http_response) params = helper.httprequest2dict(http_request)
import bottle from bottle import route, request, response, static_file, hook ## pcapi imports from pcapi import logtool from pcapi import config from pcapi import varexport from pcapi.rest import PCAPIRest log = logtool.getLogger("pcapi") ################ ROUTES #################### ####################################################### ### Rest Callbacks (can be tested with wget/curl) ### ####################################################### ### Provider capabilities ### @route('/auth/providers', method=["GET"]) def capabilities(): return PCAPIRest(request, response).capabilities() ### /export/ a public URL @route('/export/<provider>/<userid>/<path:path>', method=["GET"]) def export(userid, provider, path="/"): return PCAPIRest(request, response).export(provider, userid, path)
from bs4 import BeautifulSoup # from lxml.html.clean import clean_html, Cleaner # from lxml import etree from pcapi import logtool # from pcapi import config # from StringIO import StringIO import html5lib from html5lib import treebuilders # from html5lib import treewalkers, serializer # from html5lib.filters import sanitizer log = logtool.getLogger("FormValidator", "pcapi") class Editor(object): def __init__(self, content): self.content = content self.soup = BeautifulSoup(self.content, 'html.parser') self.elements = ["text", "textarea", "checkbox", "radio", "select", "image", "audio", "range"] def findElements(self): elements = [] for tag in self.soup.findAll("div", {"class": "fieldcontain"}): check, elem = self.checkForElements(tag["id"]) if check: log.debug("%s, %s" % (tag["id"], self.get_header(elem, tag["id"]))) elements.append([tag["id"], self.get_header(elem, tag["id"])]) return elements def checkForElements(self, tag): for el in self.elements: if el in tag and "-buttons" not in tag: