def build_hierarchy(self, geonames_id, fcodes=None): """ Accepts a geonames id and fetches a hierarchy of features from the API, returning them as a list of geoname items. """ hierarchy = [] params = { "geonameId": str(geonames_id), "username": module_config().get("geonames_username"), "token": module_config().get("geonames_token") } url = DplaGeonamesGeocoder.base_uri + \ "hierarchyJSON?%s" % urlencode(params) if (url not in DplaGeonamesGeocoder.resultCache): result = DplaGeonamesGeocoder._get_result(url) if result.get('geonames'): DplaGeonamesGeocoder.resultCache[url] = result["geonames"] else: return hierarchy # Return only the requested fcodes for feature in DplaGeonamesGeocoder.resultCache.get(url): if (("fcode" in feature and feature["fcode"] in fcodes) or fcodes is None): hierarchy.append(feature) return hierarchy
def reverse_geocode(self, lat, lng): """ Accepts latitude and longitude values and returns a geonames place that matches their value. """ params = { "lat": lat, "lng": lng, "username": module_config().get("geonames_username"), "token": module_config().get("geonames_token") } url = DplaGeonamesGeocoder.base_uri + \ "findNearbyJSON?%s" % urlencode(params) if (url not in DplaGeonamesGeocoder.resultCache): result = DplaGeonamesGeocoder._get_result(url) if ("geonames" in result \ and len(result["geonames"]) > 0): DplaGeonamesGeocoder.resultCache[url] = result["geonames"][0] else: logger.error("Could not reverse geocode (%s, %s)" % ( lat, lng, )) return None return DplaGeonamesGeocoder.resultCache[url]
def _name_search(self, name, params={}): defaults = { "q": name.encode("utf8"), "maxRows": 15, "username": module_config().get("geonames_username"), "token": module_config().get("geonames_token") } params = dict(defaults.items() + params.items()) url = DplaGeonamesGeocoder.base_uri + "searchJSON?%s" % \ urlencode(params) if (url not in DplaGeonamesGeocoder.resultCache): result = DplaGeonamesGeocoder._get_result(url) if result.get('geonames'): DplaGeonamesGeocoder.resultCache[url] = result["geonames"] else: return [] return DplaGeonamesGeocoder.resultCache[url]
def update_document(document, filepath, mime, status): """ Updates the document with a filepath of downloaded thumbnail.. Arguments: document object - document for updating (decoded by json module) filepath string - filepath to insert Returns: The document from parameter with additional field containing the filepath. """ if filepath: base_url = module_config().get('thumbs_root_url') obj = document["object"] obj["@id"] = base_url + filepath obj["format"] = mime document["object"] = obj if mime: obj = document["object"] obj["format"] = mime if status: setprop(document, "admin/object_status", status) return document
def reverse_geocode(self, lat, lng): params = { "lat": lat, "lng": lng, "username": module_config().get("geonames_username"), "token": module_config().get("geonames_token") } url = "http://ws.geonames.net/findNearbyJSON?%s" % urlencode(params) if (url not in DplaGeonamesGeocoder.resultCache): result = json.loads(util.decode_page(urlopen(url))) if ("geonames" in result \ and len(result["geonames"]) > 0): DplaGeonamesGeocoder.resultCache[url] = result["geonames"][0] else: logger.error("Could not reverse geocode (%s, %s)" % (lat, lng,)) return None return DplaGeonamesGeocoder.resultCache[url]
def __init__(self): self.base_url = module_config().get('twofishes_base_url') self.params = { 'lang': 'en', 'responseIncludes': 'PARENTS,DISPLAY_NAME', 'maxInterpretations': 1 }
def reverse_geocode_hierarchy(self, lat, lng, fcodes=None): hierarchy = [] geonames_item = self.reverse_geocode(lat, lng) if (geonames_item): params = { "geonameId": geonames_item["geonameId"], "username": module_config().get("geonames_username"), "token": module_config().get("geonames_token") } url = "http://ws.geonames.net/hierarchyJSON?%s" % urlencode(params) if (url not in DplaGeonamesGeocoder.resultCache): result = json.loads(util.decode_page(urlopen(url))) DplaGeonamesGeocoder.resultCache[url] = result["geonames"] # Return only the requested fcodes for place in DplaGeonamesGeocoder.resultCache[url]: if (("fcode" in place and place["fcode"] in fcodes) \ or fcodes is None): hierarchy.append(place) return hierarchy
def test_environment(): # assert akara.raw_config is not None assert akara.module_config("Akara") is not None assert akara.module_config("Akara")["Listen"] is not None assert akara.module_config("Akara").get("Listen") == akara.module_config("Akara")["Listen"] assert akara.module_config("Akara").get("XYZListen", 123) == 123 assert akara.module_config("Akara").require("Listen", "SHRDLU") try: akara.module_config("Akara").require("XYZListen", "SHRDLU") raise AssertionError except Exception, err: assert "SHRDLU" in str(err)
def reverse_geocode(self, lat, lng): params = { "lat": lat, "lng": lng, "username": module_config().get("geonames_username"), "token": module_config().get("geonames_token") } url = "http://ws.geonames.net/findNearbyJSON?%s" % urlencode(params) if (url not in DplaGeonamesGeocoder.resultCache): result = json.loads(util.decode_page(urlopen(url))) if ("geonames" in result \ and len(result["geonames"]) > 0): DplaGeonamesGeocoder.resultCache[url] = result["geonames"][0] else: logger.error("geocode: Could not reverse geocode (%s, %s)" % ( lat, lng, )) return None return DplaGeonamesGeocoder.resultCache[url]
def reverse_geocode(self, lat, lng): """ Accepts latitude and longitude values and returns a geonames place that matches their value. """ params = { "lat": lat, "lng": lng, "username": module_config().get("geonames_username"), "token": module_config().get("geonames_token") } url = DplaGeonamesGeocoder.base_uri + \ "findNearbyJSON?%s" % urlencode(params) if (url not in DplaGeonamesGeocoder.resultCache): result = DplaGeonamesGeocoder._get_result(url) if ("geonames" in result \ and len(result["geonames"]) > 0): DplaGeonamesGeocoder.resultCache[url] = result["geonames"][0] else: logger.error("Could not reverse geocode (%s, %s)" % (lat, lng,)) return None return DplaGeonamesGeocoder.resultCache[url]
def find_conversion_dictionary(mapping_key): """Finds the dictionary with values to use for conversion. Args: mapping_key (Str): Name of conversion key read from Akara.conf Returns: Dictionary used for converting values. """ # Mapping should be in akara.conf mapping = module_config().get('lookup_mapping') dict_name = mapping[mapping_key].upper() return globals()[dict_name]
def find_conversion_dictionary(mapping_key): """Finds the dictionary with values to use for conversion. Args: mapping_key (Str): Name of conversion key read from Akara.conf Returns: Dictionary used for converting values. """ # Mapping should be in akara.conf mapping = module_config().get("lookup_mapping") dict_name = mapping[mapping_key].upper() return globals()[dict_name]
def find_conversion_dictionary(mapping_key): """Finds the dictionary with values to use for conversion. Args: mapping_key (Str): Name of conversion key read from Akara.conf Returns: Dictionary used for converting values. """ # Mapping should be in akara.conf mapping = module_config().get('lookup_mapping') logger.debug("Looking for mapping using key [%s]" % mapping_key) dict_name = mapping[mapping_key].upper() logger.debug("Found substitution dict [%s] for key mapping [%s]" % (dict_name, mapping_key,)) return globals()[dict_name]
def find_conversion_dictionary(mapping_key): """Finds the dictionary with values to use for conversion. Args: mapping_key (Str): Name of conversion key read from Akara.conf Returns: Dictionary used for converting values. """ # Mapping should be in akara.conf mapping = module_config().get('lookup_mapping') logger.debug("Looking for mapping using key [%s]" % mapping_key) dict_name = mapping[mapping_key].upper() logger.debug("Found substitution dict [%s] for key mapping [%s]" % ( dict_name, mapping_key, )) return globals()[dict_name]
from akara import logger, response, module_config from akara.services import simple_service from amara.thirdparty import json import dplaingestion.itemtype as itemtype type_for_type_keyword = \ module_config('enrich_type').get('type_for_ot_keyword') type_for_format_keyword = \ module_config('enrich_type').get('type_for_phys_keyword') @simple_service('POST', 'http://purl.org/la/dp/enrich-type', 'enrich-type', 'application/json') def enrichtype(body, ctype, action="enrich-type", prop="sourceResource/type", format_field="sourceResource/format", default=None, send_rejects_to_format=False): """ Service that accepts a JSON document and enriches the "type" field of that document by: By default works on the 'type' field, but can be overridden by passing the name of the field to use as a parameter. A default type, if none can be determined, may be specified with the "default" querystring parameter. If no default is given, the type field will be unmodified, or not added, in the result. """
''' import amara from amara.xslt import transform from amara.xpath.util import simplify from amara.bindery import html from amara.lib import irihelpers, inputsource import akara from akara.services import simple_service from akara import response XSLT_SERVICE_ID = 'http://purl.org/akara/services/demo/xslt' XPATH_SERVICE_ID = 'http://purl.org/akara/services/demo/xpath' DEFAULT_TRANSFORM = akara.module_config().get('default_transform') URI_SPACE = akara.module_config().get('uri_space', 'http://github.com/zepheira').split() #print DEFAULT_TRANSFORM #FIXME! The core URI auth code is tested, but not the use in this module if URI_SPACE == '*': #Allow all URI access ALLOWED = [(True, True)] else: ALLOWED = [] for baseuri in URI_SPACE: #dAd a rule that permits URIs starting with this URISPACE item #FIXME: Technically should normalize uri and base, but this will work for most cases ALLOWED.append((lambda uri, base=baseuri: uri.startswith(base), True))
from amara.lib.iri import split_fragment, split_uri_ref, unsplit_uri_ref, split_authority #from amara import inputsource # Akara Imports from akara import module_config, logger, response from akara.util import multipart_post_handler, wsgibase, http_method_handler, copy_headers_to_dict from akara.services import method_dispatcher from akara.util import status_response, read_http_body_to_temp from akara.util import BadTargetError, HTTPAuthorizationError, MoinAuthorizationError, UnexpectedResponseError, MoinMustAuthenticateError, MoinNotFoundError, ContentLengthRequiredError, GenericClientError, ConflictError import akara.util.moin as moin # ====================================================================== # Module Configruation # ====================================================================== TARGET_WIKIS = module_config().get("targets", {}) TARGET_WIKI_OPENERS = {} DEFAULT_OPENER = urllib2.build_opener( urllib2.HTTPCookieProcessor(), multipart_post_handler.MultipartPostHandler) # Specifies the default max-age of Moin pages CACHE_MAX_AGE = module_config().get("CACHE_MAX_AGE", None) # Specifies a Wiki path (currently only one, FIXME) under which no caching will occur NO_CACHE_PATHS = module_config().get("NO_CACHE_PATHS", None) # Look at each Wiki URL and build an appropriate opener object for retrieving # pages. If the URL includes HTTP authentication information such as # http://user:[email protected]/mywiki, the opener is built with # basic authentication enabled. For details, see:
from akara.services import simple_service from akara import request, response from akara import module_config, logger from akara.util import copy_headers_to_dict from amara.thirdparty import json, httplib2 from amara.lib.iri import join from urllib import quote import datetime import uuid import base64 COUCH_DATABASE = module_config().get('couch_database') COUCH_DATABASE_USERNAME = module_config().get('couch_database_username') COUCH_DATABASE_PASSWORD = module_config().get('couch_database_password') COUCH_ID_BUILDER = lambda src, lname: "--".join((src,lname)) # Set id to value of the first identifier, disambiguated w source. Not sure if # an OAI handle is guaranteed or on what scale it's unique. # FIXME it's looking like an id builder needs to be part of the profile. Or UUID as fallback? COUCH_REC_ID_BUILDER = lambda src, rec: COUCH_ID_BUILDER(src,rec.get(u'id','no-id').strip().replace(" ","__")) COUCH_AUTH_HEADER = { 'Authorization' : 'Basic ' + base64.encodestring(COUCH_DATABASE_USERNAME+":"+COUCH_DATABASE_PASSWORD) } # FIXME: this should be JSON-LD, but CouchDB doesn't support +json yet CT_JSON = {'Content-Type': 'application/json'} H = httplib2.Http() H.force_exception_as_status_code = True # FIXME: should support changing media type in a pipeline
from akara import logger, response, module_config from akara.services import simple_service from amara.thirdparty import json from dplaingestion.selector import delprop, getprop, setprop, exists import dplaingestion.itemtype as itemtype import re type_for_type_keyword = \ module_config('enrich_type').get('type_for_ot_keyword') type_for_format_keyword = \ module_config('enrich_type').get('type_for_phys_keyword') @simple_service('POST', 'http://purl.org/la/dp/enrich-type', 'enrich-type', 'application/json') def enrichtype(body, ctype, action="enrich-type", prop="sourceResource/type", format_field="sourceResource/format", default=None, send_rejects_to_format=False): """ Service that accepts a JSON document and enriches the "type" field of that document by: By default works on the 'type' field, but can be overridden by passing the name of the field to use as a parameter. A default type, if none can be determined, may be specified with the "default" querystring parameter. If no default is given, the type field will be unmodified, or not added, in the result. """
# This is a very simple implementation of conditional GET with # the Last-Modified header. It makes media files a bit speedier # because the files are only read off disk for the first request # (assuming the browser/client supports conditional GET). mtime = formatdate(os.stat(filename).st_mtime, usegmt=True) headers = [('Last-Modified', mtime)] if environ.get('HTTP_IF_MODIFIED_SINCE', None) == mtime: status = '304 Not Modified' output = () else: status = '200 OK' mime_type = mimetypes.guess_type(filename)[0] if mime_type: headers.append(('Content-Type', mime_type)) output = [fp.read()] fp.close() start_response(status, headers) return output import akara if not akara.module_config(): akara.logger.warn("No configuration section found for %r" % (__name__, )) paths = akara.module_config().get("paths", {}) for path, root in paths.items(): handler = MediaHandler(root) registry.register_service(SERVICE_ID, path, handler)
from itertools import dropwhile import amara from amara import bindery from amara.tools import atomtools from amara.thirdparty import httplib2 from amara.lib.util import first_item from amara.thirdparty import json from akara.services import simple_service from akara import request, response from akara import logger, module_config # These come from the akara.demos.atomtools section of the Akara configuration file ENTRIES = module_config().warn("entries", "/path/to/entry/files/*.atom", "glob path to Atom entries") FEED_ENVELOPE = module_config().warn("feed_envelope", '''<feed xmlns="http://www.w3.org/2005/Atom"> <title>This is my feed</title><id>http://example.com/my_feed</id> </feed>''', "XML envelope around the Atom entries") #text/uri-list from RFC 2483 SERVICE_ID = 'http://purl.org/akara/services/demo/atom.json' @simple_service('GET', SERVICE_ID, 'akara.atom.json', 'application/json') def atom_json(url): ''' Convert Atom syntax to Exhibit JSON (see: http://www.ibm.com/developerworks/web/library/wa-realweb6/ ; this is based on listing 3)
VALUE_SET_TYPE = 'value_set' VARIABLE_LABELS_TYPE = 'variable_labels' VALUE_LABELS_TYPE = 'value_labels' #R_SCRIPT = '''library(foreign) #mydata <- read.spss(file='%s') #write.csv2(mydata) #''' R_SCRIPT = '''library(Hmisc) mydata <- spss.get(file='%s') write.csv2(mydata) ''' R_FILE_CMD = akara.module_config(__name__).get('r_command', 'r') POR_REQUIRED = _("The 'POR' POST parameter is mandatory.") SERVICE_ID = 'http://purl.org/akara/services/demo/spss.json' @simple_service('POST', SERVICE_ID, 'spss.json', 'application/json') def spss2json(body, ctype, **params): ''' Uses GNU R to convert SPSS to JSON Optionally tries to guess long labels from an original .SPS file Requires POST body of multipart/form-data Sample request: curl -F "[email protected]" http://localhost:8880/spss.json curl -F "[email protected]" -F "[email protected]" http://localhost:8880/spss.json
""" __author__ = 'aleksey' import re from akara import logger from akara import response from akara.services import simple_service from amara.thirdparty import json from akara import module_config from dplaingestion import selector IGNORE = module_config().get('IGNORE') PENDING = module_config().get('PENDING') @simple_service('POST', 'http://purl.org/la/dp/artstor_identify_object', 'artstor_identify_object', 'application/json') def artstor_identify_object(body, ctype, download="True"): try: data = json.loads(body) except: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON" original_document_key = u"originalRecord" original_sources_key = u"handle"
def geocode(body, ctype, prop="sourceResource/spatial", newprop='coordinates'): ''' Adds geocode data to the record coming in as follows: 1. Attempt to get a lat/lng coordinate from the property. We use Bing to lookup lat/lng from a string as it is much better than Geonames. 2. For the given lat/lng coordinate, attempt to determine its parent features (county, state, country). We use Geonames to reverse geocode the lat/lng point and retrieve the location hierarchy. ''' # logger.debug("Received: " + body) try: data = json.loads(body) except: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON" if (not exists(data, prop)): # logger.warn("geocode: COULD NOT FIND %s" % prop) pass else: logger.debug("Geocoding %s" % data["_id"]) value = getprop(data, prop) for v in iterify(value): if coordinate(v["name"]): result = coordinate(v["name"]) else: # Attempt to find this item's lat/lng coordinates result = DplaBingGeocoder(api_key=module_config().get( "bing_api_key")).geocode_spatial(v) if (result): lat, lng = result v[newprop] = "%s, %s" % ( lat, lng, ) # Reverse-geocode this location to find country, state, and county parent places hierarchy = DplaGeonamesGeocoder().reverse_geocode_hierarchy( lat, lng, [ "PCLI", # Country "ADM1", # State "ADM2" ]) # County for place in hierarchy: fcode = place["fcode"] if ("PCLI" == place["fcode"]): v["country"] = place["toponymName"] elif ("ADM1" == place["fcode"]): v["state"] = place["toponymName"] elif ("ADM2" == place["fcode"]): v["county"] = place["toponymName"] # Deterine how close we are to the original coordinates, to see if this is the # place that was geocoded and we should stop adding more specificity (e.g. if # the record specified "South Carolina", we don't want to include the county # that is located at the coordinates of South Carolina. We use increasing # tolerance levels to account for differences between Bing and Geonames # coordinates. d = haversine((lat, lng), (place["lat"], place["lng"])) if (("PCLI" == place["fcode"] and d < 50 ) # Country tolerance (Bing/Geonames 49.9km off) \ or ("ADM1" == place["fcode"] and d < 15)): # State tolerance break else: logger.debug("geocode: No result found for %s" % v) return json.dumps(data)
from zen import spreadsheet as spreadsheetlib from zen.temporal import smart_parse_date from zen.csvhelper import readcsv from zen.mods import mods2json#, MODS_NAMESPACE #from zen.akamod import geolookup_service from zen.whatfile import guess_imt from zen.feeds import webfeed from zen.exhibit import UNSUPPORTED_IN_EXHIBITKEY from zen import ejsonify from . import __version__ as VERSION CHUNKCOUNT = 10 BIBTEX2MODS = module_config().get('bib2xml_command', 'bib2xml') DIAGNOSTICS = module_config().get('dataload_diagnostics', False) BIBTEX_IMT = ["application/x-bibtex", "text/x-bibtex"] GENERIC_BINARY_IMT = 'application/octet-stream' UNKNOWN_IMT = 'application/unknown' UNKNOWN_TEXT_IMT = 'text/unknown' EXCEL_IMTS = ['application/vnd.ms-excel', 'application/vnd.ms-office', 'application/msword', GENERIC_BINARY_IMT] #FIXME: Will grow monotonically. Switch to LRU algo #CACHE = {} EXHIBIT_RESERVED = ['label', 'id', 'type'] MODS_NAMESPACE = 'http://www.loc.gov/mods/v3'
def geocode(body, ctype, prop="sourceResource/spatial", newprop='coordinates'): ''' Adds geocode data to the record coming as follows: 1. If the coordinates property does not exist, attempt to extract it from name. 2. Run GeoNames enrichment, reverse encoding coordinate values to identify, parent features, or (if none exist) searching for name values. Put parent features in appropriate state/country values. 3. If we still haven't identified the place, use Bing to get lat/long values. If one is found, pass the coordinates through Geonames again to identify parent features. 4. Add any non-existing features to the spatial dictionary. ''' try: data = json.loads(body) except: response.code = 500 response.add_header('content-type','text/plain') return "Unable to parse body as JSON" if (not exists(data, prop)): pass else: logger.debug("Geocoding %s" % data["_id"]) value = getprop(data, prop) places = [] for v in iterify(value): bing_geocode = True if not isinstance(v, dict): logger.error("Spatial value must be a dictionary; record %s" % data["_id"]) continue place = Place(v) if place.name: coords = get_coordinates(place.name) if coords: place.coordinates = coords place.name = None place.set_name() # Run Geonames enrichment to do initial search place.enrich_geodata(DplaGeonamesGeocoder()) # Don't enrich with geodata if place is 'United States' pattern = ur" *(United States(?!-)|États-Unis|USA)" if (place.name and re.search(pattern, place.name)): bing_geocode = False if bing_geocode: # Attempt to find this item's lat/lng coordinates if not place.coordinates: api_key = module_config().get("bing_api_key") place.enrich_geodata(DplaBingGeocoder(api_key=api_key)) # rerun geonames enrichment with new coordinates place.enrich_geodata(DplaGeonamesGeocoder()) if not place.validate(): if not place.set_name(): logger.error("Spatial dictionary must have a " + "'name' property. Could not enhance input " + "data to include a name property; " + "record %s" % data["_id"]) places.append(place) values = map(lambda x: x.to_map_json(), Place.merge_related(places)) setprop(data, prop, values) return json.dumps(data)
# Make sure the environment is set up @simple_service("GET", "http://example.com/test") def test_environment(): # assert akara.raw_config is not None assert akara.module_config("Akara") is not None assert akara.module_config("Akara")["Listen"] is not None assert akara.module_config("Akara").get("Listen") == akara.module_config("Akara")["Listen"] assert akara.module_config("Akara").get("XYZListen", 123) == 123 assert akara.module_config("Akara").require("Listen", "SHRDLU") try: akara.module_config("Akara").require("XYZListen", "SHRDLU") raise AssertionError except Exception, err: assert "SHRDLU" in str(err) assert not akara.module_config("XYZAkara") # should return False assert akara.module_config("Akara") # should return True # simple services can access the WSGI environ this way assert request.environ is not None # Here's how to override the response fields assert response.code.startswith("200 ") assert response.headers == [] return "Good!" # Make sure the simple_service can specify the path # (If not given, uses the function's name) @simple_service("GET", "http://example.com/test", path="test.new.path") def test_set_path():
def geocode(body, ctype, prop="sourceResource/spatial", newprop='coordinates'): ''' Adds geocode data to the record coming as follows: 1. If the coordinates property does not exist, attempt to extract it from name. 2. Run GeoNames enrichment, reverse encoding coordinate values to identify, parent features, or (if none exist) searching for name values. Put parent features in appropriate state/country values. 3. If we still haven't identified the place, use Bing to get lat/long values. If one is found, pass the coordinates through Geonames again to identify parent features. 4. Add any non-existing features to the spatial dictionary. ''' try: data = json.loads(body) except: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON" if (not exists(data, prop)): pass else: logger.debug("Geocoding %s" % data["_id"]) value = getprop(data, prop) places = [] for v in iterify(value): bing_geocode = True if not isinstance(v, dict): logger.error("Spatial value must be a dictionary; record %s" % data["_id"]) continue place = Place(v) if place.name: coords = get_coordinates(place.name) if coords: place.coordinates = coords place.name = None place.set_name() # Run Geonames enrichment to do initial search place.enrich_geodata(DplaGeonamesGeocoder()) # Don't enrich with geodata if place is 'United States' pattern = ur" *(United States(?!-)|États-Unis|USA)" if (place.name and re.search(pattern, place.name)): bing_geocode = False if bing_geocode: # Attempt to find this item's lat/lng coordinates if not place.coordinates: api_key = module_config().get("bing_api_key") place.enrich_geodata(DplaBingGeocoder(api_key=api_key)) # rerun geonames enrichment with new coordinates place.enrich_geodata(DplaGeonamesGeocoder()) if not place.validate(): if not place.set_name(): logger.error("Spatial dictionary must have a " + "'name' property. Could not enhance input " + "data to include a name property; " + "record %s" % data["_id"]) places.append(place) values = map(lambda x: x.to_map_json(), Place.merge_related(places)) setprop(data, prop, values) return json.dumps(data)
def geocode(body, ctype, prop="sourceResource/spatial", newprop='coordinates'): ''' Adds geocode data to the record coming in as follows: 1. Attempt to get a lat/lng coordinate from the property. We use Bing to lookup lat/lng from a string as it is much better than Geonames. 2. For the given lat/lng coordinate, attempt to determine its parent features (county, state, country). We use Geonames to reverse geocode the lat/lng point and retrieve the location hierarchy. ''' try: data = json.loads(body) except: response.code = 500 response.add_header('content-type','text/plain') return "Unable to parse body as JSON" if (not exists(data, prop)): pass else: logger.debug("Geocoding %s" % data["_id"]) value = getprop(data, prop) for v in iterify(value): if "name" not in v: continue if coordinate(v["name"]): result = coordinate(v["name"]) else: # Attempt to find this item's lat/lng coordinates result = DplaBingGeocoder(api_key=module_config().get("bing_api_key")).geocode_spatial(v) if (result): lat, lng = result v[newprop] = "%s, %s" % (lat, lng,) # Reverse-geocode this location to find country, state, and county parent places hierarchy = DplaGeonamesGeocoder().reverse_geocode_hierarchy(lat, lng, ["PCLI", # Country "ADM1", # State "ADM2"]) # County for place in hierarchy: fcode = place["fcode"] if ("PCLI" == place["fcode"]): v["country"] = place["toponymName"] elif ("ADM1" == place["fcode"]): v["state"] = place["toponymName"] elif ("ADM2" == place["fcode"]): v["county"] = place["toponymName"] # Deterine how close we are to the original coordinates, to see if this is the # place that was geocoded and we should stop adding more specificity (e.g. if # the record specified "South Carolina", we don't want to include the county # that is located at the coordinates of South Carolina. We use increasing # tolerance levels to account for differences between Bing and Geonames # coordinates. d = haversine((lat, lng), (place["lat"], place["lng"])) if (("PCLI" == place["fcode"] and d < 50) # Country tolerance (Bing/Geonames 49.9km off) \ or ("ADM1" == place["fcode"] and d < 15)): # State tolerance break else: logger.debug("No geocode result found for %s" % v) return json.dumps(data)
from akara.caching import cache, make_named_cache from zen import spreadsheet as spreadsheetlib from zen.temporal import smart_parse_date from zen.csvhelper import readcsv from zen.mods import mods2json#, MODS_NAMESPACE from zen.geo import geolookup from zen.whatfile import guess_imt from zen.feeds import webfeed from zen.exhibit import UNSUPPORTED_IN_EXHIBITKEY from . import __version__ as VERSION CHUNKCOUNT = 10 BIBTEX2MODS = module_config().get('bib2xml_command', 'bib2xml') DIAGNOSTICS = module_config().get('dataload_diagnostics', False) BIBTEX_IMT = ["application/x-bibtex", "text/x-bibtex"] GENERIC_BINARY_IMT = 'application/octet-stream' UNKNOWN_IMT = 'application/unknown' UNKNOWN_TEXT_IMT = 'text/unknown' EXCEL_IMTS = ['application/vnd.ms-excel', 'application/vnd.ms-office', 'application/msword', GENERIC_BINARY_IMT] #FIXME: Will grow monotonically. Switch to LRU algo #CACHE = {} EXHIBIT_RESERVED = ['label', 'id', 'type'] MODS_NAMESPACE = 'http://www.loc.gov/mods/v3'
import urllib, urllib2, urlparse from subprocess import * import cgi from cStringIO import StringIO from itertools import * from contextlib import closing from amara import _ from amara.lib.util import * import akara from akara.util import copy_auth from akara.services import simple_service Q_REQUIRED = _("The 'q' POST parameter is mandatory.") SVN_COMMIT_CMD = akara.module_config().get('svn_commit', 'svn commit -m "%(msg)s" %(fpath)s') SVN_ADD_CMD = akara.module_config().get('svn_add', 'svn add %(fpath)s') SERVICE_ID = 'http://purl.org/akara/services/demo/svncommit' @simple_service('POST', SERVICE_ID, 'akara.svncommit', 'text/plain') def svncommit(body, ctype, **params): '''Commit a file. Can optionally populate the file contents from a given URL. The form parameters are: fpath - the name of the file to commit to SVN msg - the commit message q (optional) - fetch the given URL and save it to the specified file before commmiting The form must be POSTed as multipart/form-data. If the request includes the 'q' parameter then the new fetch will contain authentication forward
VALUE_SET_TYPE = 'value_set' VARIABLE_LABELS_TYPE = 'variable_labels' VALUE_LABELS_TYPE = 'value_labels' #R_SCRIPT = '''library(foreign) #mydata <- read.spss(file='%s') #write.csv2(mydata) #''' R_SCRIPT = '''library(Hmisc) mydata <- spss.get(file='%s') write.csv2(mydata) ''' R_FILE_CMD = akara.module_config(__name__).get('r_command', 'r') POR_REQUIRED = _("The 'POR' POST parameter is mandatory.") SERVICE_ID = 'http://purl.org/akara/services/demo/spss.json' @simple_service('POST', SERVICE_ID, 'spss.json', 'application/json') def spss2json(body, ctype, **params): ''' Uses GNU R to convert SPSS to JSON Optionally tries to guess long labels from an original .SPS file Requires POST body of multipart/form-data Sample request:
from contextlib import closing from amara.thirdparty import json, httplib2 from akara.services import simple_service from akara import logger from akara import response from akara.caching import cache, make_named_cache from akara import module_config from zen.services import service_proxy from zen import augmentation from zen.akamod import geolookup_service #from zen.geo import local_geonames, US_STATE_FIRST GEONAMES_PLUS_DBFILE = module_config().get('geonames_dbfile') CHUNKCOUNT = 10 UNSUPPORTED_IN_EXHIBITKEY = re.compile('\W') EXHIBIT_RESERVED = ['label', 'id', 'type'] #GEOLOOKUP_CACHE = cache('http://purl.org/com/zepheira/services/geolookup.json', expires=24*60*60) def post(body, sink): headers = {'Content-type': 'application/json'} h = httplib2.Http() resp, content = h.request(sink, "POST", body=body, headers=headers) return resp, content
import os from dplaingestion.mappers.oac_dc_mapper import OAC_DCMapper from dplaingestion.selector import exists, getprop from dplaingestion.utilities import iterify from akara import module_config URL_OAC_CONTENT_BASE = module_config().get( 'url_oac_content', os.environ.get('URL_OAC_CONTENT_BASE', 'http://content.cdlib.org') ) class OAC_DCMapperSuppressPublisher(OAC_DCMapper): '''Mapper for OAC xml feed with bogus publisher fields''' # sourceResource mapping def map_publisher(self): pass
from akara.util import find_peer_service, extract_auth, read_http_body_to_temp, copy_headers_to_dict from akara.util import status_response from akara.util.moin import ORIG_BASE_HEADER, DOCBOOK_IMT, RDF_IMT, HTML_IMT, XML_IMT #from akara.services import simple_service from akara.services import method_dispatcher from akara import request, logger, module_config from akara.opensearch import apply_template from zen.util import requested_imt, requested_lang from zen import ZEN_SERVICE_ID #import zenlib.moinmodel #from zenlib.moinmodel import node, rulesheet, moinrest_resolver, parse_moin_xml, zenuri_to_moinrest, MOINREST_SERVICE_ID #from zenlib.util import find_peer_service SECRET = module_config().get('RULESHEET_SECRET', '') SPACESDEF = module_config()['SPACES']() SPACES = {} UNSUPPORTED_IN_FILENAME = re.compile('\W') DEFAULT_MOUNT = 'zen' H = httplib2.Http('/tmp/.cache') FIRST_REQUEST_FLAG = False #def module_load(): FIND_PEER_SERVICE_KEY = 'akara.FIND_PEER_SERVICE' def setup_request(environ):
import os from akara import logger from akara import response from akara.services import simple_service from amara.thirdparty import json from dplaingestion.selector import getprop, setprop, exists from akara import module_config IGNORE = module_config().get("IGNORE") PENDING = module_config().get("PENDING") @simple_service( "POST", "http://purl.org/la/dp/kentucky_identify_object", "kentucky_identify_object", "application/json" ) def kentucky_identify_object(body, ctype, download="True"): """ Responsible for: adding a field to a document with the URL where we should expect to the find the thumbnail """ data = {} try: data = json.loads(body) except: response.code = 500 response.add_header("content-type", "text/plain") return "Unable to parse body as JSON" relation_field = "sourceResource/relation" if exists(data, relation_field): url = getprop(data, relation_field)
import amara from amara.thirdparty import httplib2 import akara from akara.services import simple_service from akara import response from akara import logger from akara.util import normalize_http_header_name import calendar import email import email.Utils import time MAXLEN = akara.module_config().get('maxlen') if None in MAXLEN: DEFAULT_MAXLEN = MAXLEN[None] del MAXLEN[None] else: DEFAULT_MAXLEN = 3600 OVERRIDE_STALE = akara.module_config().get('override_stale',0) CACHE_PROXY_SERVICE_ID = 'http://purl.org/xml3k/akara/services/demo/cache-proxy' MAXAGE_HEADER = lambda age: ('Cache-Control','max-age={0}'.format(age)) #FIXME: recycle after N uses H = httplib2.Http()
None ''' import amara #from amara.bindery import html from amara.thirdparty import json, httplib2 from amara.lib import irihelpers, inputsource from akara.services import simple_service from akara import request, response, logger, module_config TOCOUCH_SERVICE_ID = 'http://purl.org/akara/services/demo/tocouch' H = httplib2.Http('/tmp/.cache') COUCHBASE = module_config().get('couchbase', 'http://sforza.ogbuji.net:5984/famulus/') @simple_service('GET', TOCOUCH_SERVICE_ID, 'tocouch', 'text/html') def tocouch(**params): ''' @xslt - URL to the XSLT transform to be applied all other query parameters are passed ot the XSLT processor as top-level params Sample request: curl --request POST --data-binary "@foo.xml" --header "Content-Type: application/xml" "http://*****:*****@xslt=http://hg.akara.info/amara/trunk/raw-file/tip/demo/data/identity.xslt" You can check after the fact by visiting http://sforza.ogbuji.net:5984/test1/_all_docs Then get the id and surf there http://sforza.ogbuji.net:5984/test1/b10d978ced600227e663d6503b1abec4
import amara from amara.thirdparty import httplib2 import akara from akara.services import simple_service from akara import response from akara import logger from akara.util import normalize_http_header_name import calendar import email import email.Utils import time MAXLEN = akara.module_config().get('maxlen') if None in MAXLEN: DEFAULT_MAXLEN = MAXLEN[None] del MAXLEN[None] else: DEFAULT_MAXLEN = 3600 OVERRIDE_STALE = akara.module_config().get('override_stale', 0) CACHE_PROXY_SERVICE_ID = 'http://purl.org/xml3k/akara/services/demo/cache-proxy' MAXAGE_HEADER = lambda age: ('Cache-Control', 'max-age={0}'.format(age)) #FIXME: recycle after N uses H = httplib2.Http()
from akara.services import simple_service from akara import request, response from akara import module_config, logger from akara.util import copy_headers_to_dict from amara.thirdparty import json, httplib2 from amara.lib.iri import join import uuid COUCH_DATABASE = module_config().get('couch_database') # FIXME: this should be JSON-LD, but CouchDB doesn't support +json yet CT_JSON = {'Content-Type': 'application/json'} H = httplib2.Http() H.force_exception_as_status_code = True # FIXME: should support changing media type in a pipeline def pipe(content,ctype,enrichments,wsgi_header): body = json.dumps(content) for uri in enrichments: if len(uri) < 1: continue # in case there's no pipeline headers = copy_headers_to_dict(request.environ,exclude=[wsgi_header]) headers['content-type'] = ctype resp, cont = H.request(uri,'POST',body=body,headers=headers) if not str(resp.status).startswith('2'): logger.debug("Error in enrichment pipeline at %s: %s"%(uri,repr(resp))) continue body = cont return body
from akara import logger from akara import response from akara.services import simple_service from amara.thirdparty import json from dplaingestion.selector import getprop, setprop, exists from akara import module_config from amara.lib.iri import is_absolute IGNORE = module_config().get('IGNORE') PENDING = module_config().get('PENDING') @simple_service('POST', 'http://purl.org/la/dp/contentdm_identify_object', 'contentdm_identify_object', 'application/json') def contentdm_identify_object(body, ctype, download="True"): """ Responsible for: adding a field to a document with the URL where we should expect to the find the thumbnail. There are two methods of creating the thumbnail URL: 1. Replacing "cdm/ref" with "utils/getthumbail" in the handle field Example: handle: http://test.provider/cdm/ref/collection/1/id/1 thumbnail: http://test.provider/utils/getthumbnail/collection/1/id/1 2. Splitting the handle field on "u?" and using the parts to compose the thumbnail URL. Example: handle: http://test.provider/u?/ctm,101 thumbnail: http://test.provider/cgi-bin/thumbnail.exe?CISOROOT=/ctm&CISOPTR=101" """
from amara.lib.iri import split_fragment, split_uri_ref, unsplit_uri_ref, split_authority #from amara import inputsource # Akara Imports from akara import module_config, logger, response from akara.util import multipart_post_handler, wsgibase, http_method_handler, copy_headers_to_dict from akara.services import method_dispatcher from akara.util import status_response, read_http_body_to_temp from akara.util import BadTargetError, HTTPAuthorizationError, MoinAuthorizationError, UnexpectedResponseError, MoinMustAuthenticateError, MoinNotFoundError, ContentLengthRequiredError, GenericClientError, ConflictError import akara.util.moin as moin # ====================================================================== # Module Configruation # ====================================================================== TARGET_WIKIS = module_config().get("targets", {}) TARGET_WIKI_OPENERS = {} DEFAULT_OPENER = urllib2.build_opener( urllib2.HTTPCookieProcessor(), multipart_post_handler.MultipartPostHandler) # Specifies the default max-age of Moin pages CACHE_MAX_AGE = module_config().get("CACHE_MAX_AGE", None) # Specifies a Wiki path (currently only one, FIXME) under which no caching will occur NO_CACHE_PATHS = module_config().get("NO_CACHE_PATHS", None) # Look at each Wiki URL and build an appropriate opener object for retrieving # pages. If the URL includes HTTP authentication information such as # http://user:[email protected]/mywiki, the opener is built with # basic authentication enabled. For details, see: #
from StringIO import StringIO from akara import module_config import pprint import sys import re import os import os.path import urllib from akara import logger from akara import response from akara.services import simple_service from amara.thirdparty import json from dplaingestion.selector import getprop, setprop, exists # The main directory where the images will be saved. THUMBS_ROOT_PATH = module_config().get('thumbs_root_path') # The dictionary containing mapping of MIME type to file extension. # What's more, only those MIME types will be saved. MIME_TYPES = module_config().get('mime_to_type') def update_document(document, filepath, mime, status): """ Updates the document with a filepath of downloaded thumbnail.. Arguments: document object - document for updating (decoded by json module) filepath string - filepath to insert Returns:
# This is a very simple implementation of conditional GET with # the Last-Modified header. It makes media files a bit speedier # because the files are only read off disk for the first request # (assuming the browser/client supports conditional GET). mtime = formatdate(os.stat(filename).st_mtime, usegmt=True) headers = [('Last-Modified', mtime)] if environ.get('HTTP_IF_MODIFIED_SINCE', None) == mtime: status = '304 Not Modified' output = () else: status = '200 OK' mime_type = mimetypes.guess_type(filename)[0] if mime_type: headers.append(('Content-Type', mime_type)) output = [fp.read()] fp.close() start_response(status, headers) return output import akara if not akara.module_config(): akara.logger.warn("No configuration section found for %r" % (__name__,)) paths = akara.module_config().get("paths", {}) for path, root in paths.items(): handler = MediaHandler(root) registry.register_service(SERVICE_ID, path, handler)
from akara import module_config import pprint import sys import re import os import os.path import urllib from akara import logger from akara import response from akara.services import simple_service from amara.thirdparty import json from dplaingestion.selector import getprop, setprop, exists # The main directory where the images will be saved. THUMBS_ROOT_PATH = module_config().get('thumbs_root_path') # The dictionary containing mapping of MIME type to file extension. # What's more, only those MIME types will be saved. MIME_TYPES = module_config().get('mime_to_type') def update_document(document, filepath, mime, status): """ Updates the document with a filepath of downloaded thumbnail.. Arguments: document object - document for updating (decoded by json module) filepath string - filepath to insert
from amara.thirdparty import json, httplib2 from akara.services import simple_service from akara import logger from akara import response from akara.caching import cache, make_named_cache from akara import module_config from zen.services import service_proxy from zen import augmentation from zen.akamod import geolookup_service # from zen.geo import local_geonames, US_STATE_FIRST GEONAMES_PLUS_DBFILE = module_config().get("geonames_dbfile") CHUNKCOUNT = 10 UNSUPPORTED_IN_EXHIBITKEY = re.compile("\W") EXHIBIT_RESERVED = ["label", "id", "type"] # GEOLOOKUP_CACHE = cache('http://purl.org/com/zepheira/services/geolookup.json', expires=24*60*60) def post(body, sink): headers = {"Content-type": "application/json"} h = httplib2.Http() resp, content = h.request(sink, "POST", body=body, headers=headers) return resp, content
from amara import bindery from amara.thirdparty import json from akara.services import simple_service from akara.util import status_response from akara import response from akara import logger from akara import module_config #from zen.latlong import latlong from zen.geo import local_geonames, geonames_service LOCAL_GEONAMES = 'http://purl.org/com/zepheira/services/geocoders/local-geonames' GEONAMES_SERVICE = 'http://purl.org/com/zepheira/services/geocoders/geonames-service' GEOCODER = module_config().get('geocoder', LOCAL_GEONAMES) GEONAMES_PLUS_DBFILE = module_config().get('geonames_dbfile') GEONAMES_SERVICE_USER = module_config().get('geonames_service_user') # Specifies the default max-age of across-the-board lookups CACHE_MAX_AGE = str(module_config().get('cache_max_age')) geocoder_func = None if GEOCODER == LOCAL_GEONAMES: geocoder_func = local_geonames(GEONAMES_PLUS_DBFILE, logger=logger) if GEOCODER == GEONAMES_SERVICE: geocoder_func = geonames_service(user=GEONAMES_SERVICE_USER, logger=logger)
from dplaingestion.oai import oaiservice from couchdb.client import Server from akara.services import simple_service from akara import request, response from akara import module_config, logger from akara.util import copy_headers_to_dict from amara.thirdparty import json, httplib2 from amara.lib.iri import join from urllib import quote import datetime import uuid import base64 # Configuration for accessing the database. COUCH_DATABASE = module_config().get('couch_database') COUCH_DATABASE_USERNAME = module_config().get('couch_database_username') COUCH_DATABASE_PASSWORD = module_config().get('couch_database_password') COUCH_AUTH_HEADER = { 'Authorization' : 'Basic ' + base64.encodestring(COUCH_DATABASE_USERNAME+":"+COUCH_DATABASE_PASSWORD) } CT_JSON = {'Content-Type': 'application/json'} # The app name used for accessing the views. VIEW_APP = "thumbnails" # The view name for accessing the documents which need getting the thumbnail. VIEW_NAME = "all_for_downloading" UPDATE_SERVICE_ID = 'http://purl.org/la.dp/dpla-thumbs-update-doc' LISTRECORDS_SERVICE_ID = 'http://purl.org/la.dp/dpla-thumbs-list-for-downloading'