示例#1
0
# -*- coding: utf-8 -*-
#
# Writes the payload of a packet as a string to a file.
# Based on outputs.fileoutput.FileOutput.
#
# Author: Frank Steggink
#
from stetl.component import Config
from stetl.filter import Filter
from stetl.util import Util
from stetl.packet import FORMAT

import os

log = Util.get_log('packetwriter')


class PacketWriter(Filter):
    """
    Writes the payload of a packet as a string to a file.

    consumes=FORMAT.any, produces=FORMAT.string
    """

    # Start attribute config meta
    @Config(ptype=str, default=None, required=True)
    def file_path(self):
        """
        File path to write content to.

        Required: True
示例#2
0
#!/usr/bin/env python
#
# Extracts arrays of etree GML features from an GML etree document.
#
# Author: Just van den Broecke
#
from stetl.util import Util
from stetl.filter import Filter
from stetl.packet import FORMAT

log = Util.get_log('gmlfeatureextractor')


class GmlFeatureExtractor(Filter):
    """
    Extract arrays of GML features etree elements from etree docs.

    consumes=FORMAT.etree_doc, produces=FORMAT.etree_feature_array
    """

    # XPATH Query base for extracting features by (non-namespaced thus local-name) tagname
    xpath_base = "//*[local-name() = '%s']"

    # Constructor
    def __init__(self, configdict, section='gml_feature_extractor'):
        Filter.__init__(self, configdict, section, consumes=FORMAT.etree_doc, produces=FORMAT.etree_feature_array)

        log.info("cfg = %s" % self.cfg.to_string())

        # Build the Xpath expresion from configures tagnames
        self.feature_tags = self.cfg.get('feature_tags').split(',')
示例#3
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Executes the given command and returns the captured output.
#
# Author: Frank Steggink
#
import subprocess
import os
from stetl.component import Config
from stetl.filter import Filter
from stetl.util import Util
from stetl.packet import FORMAT

log = Util.get_log('execfilter')


class ExecFilter(Filter):
    """
    Executes any command (abstract base class).
    """

    @Config(ptype=str, default='', required=False)
    def env_args(self):
        """
        Provides of list of environment variables which will be used when executing the given command.

        Example: env_args = pgpassword=postgres othersetting=value~with~spaces
        """
        pass
示例#4
0
# -*- coding: utf-8 -*-
#
# Output classes for ETL, databases.
#
# Author: Just van den Broecke
#
from stetl.output import Output
from stetl.util import Util
from stetl.packet import FORMAT
from stetl.component import Config
from stetl.postgis import PostGIS

log = Util.get_log('dboutput')


class DbOutput(Output):
    """
    Output to any database (abstract base class).
    """

    def __init__(self, configdict, section, consumes):
        Output.__init__(self, configdict, section, consumes)

    def write(self, packet):
        return packet


class PostgresDbOutput(DbOutput):
    """
    Output to PostgreSQL database.
    Input is an SQL string.
示例#5
0
#!/usr/bin/env python
#
# Splits stream of GML lines into etree docs.
#
# Author: Just van den Broecke
#
import codecs
from deprecated.sphinx import deprecated
from stetl.util import Util, etree, StringIO
from stetl.filter import Filter
from stetl.packet import FORMAT

log = Util.get_log('gmlsplitter')


@deprecated(
    version='1.0.4',
    reason=
    'Use the more robust XmlElementStreamerFileInput + XmlAssembler instead!!!'
)
class GmlSplitter(Filter):
    """
    Split a stream of text XML lines into documents
    TODO phase out

    consumes=FORMAT.xml_line_stream, produces=FORMAT.etree_doc
    """
    def __init__(self, configdict, section='gml_splitter'):
        Filter.__init__(self,
                        configdict,
                        section,
示例#6
0
#!/usr/bin/env python
#
# Converts Stetl Packet FORMATs. This can be used to connect
# Stetl components with different output/input formats.
#
# Author:Just van den Broecke

import json
from stetl.component import Config
from stetl.util import Util, etree
from stetl.filter import Filter
from stetl.packet import FORMAT

log = Util.get_log("formatconverter")


class FormatConverter(Filter):
    """
    Converts (almost) any packet format (if converter available).

    consumes=FORMAT.any, produces=FORMAT.any but actual formats
    are changed at initialization based on the input to output format to
    be converted via the input_format and output_format config parameters.
    """

    # Start attribute config meta
    # Applying Decorator pattern with the Config class to provide
    # read-only config values from the configured properties.

    @Config(ptype=dict, default=None, required=False)
    def converter_args(self):
示例#7
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Splits stream of XML elements into etree docs.
#
# Author: Just van den Broecke
#
from stetl.util import Util, etree
from stetl.filter import Filter
from stetl.packet import FORMAT

log = Util.get_log('xmlassembler')


class XmlAssembler(Filter):
    """
    Split a stream of etree DOM XML elements (usually Features) into etree DOM docs.
    Consumes and buffers elements until max_elements reached, will then produce an etree doc.

    consumes=FORMAT.etree_element_stream, produces=FORMAT.etree_doc
    """
    xpath_base = "//*[local-name() = '%s']"

    # Constructor
    def __init__(self, configdict, section):
        Filter.__init__(self,
                        configdict,
                        section,
                        consumes=FORMAT.etree_element_stream,
                        produces=FORMAT.etree_doc)
#
# Filter that prepares a GFS file which can be used to load with ogr2ogr.
#
# Author: Frank Steggink

import os
import re
import subprocess

from stetl.component import Config
from stetl.filter import Filter
from stetl.packet import FORMAT
from stetl.util import Util, etree
from string import Template

log = Util.get_log("gfspreparationfilter")


class GfsPreparationFilter(Filter):
    """
    This filter prepares a GFS file, so any GML data will be loaded optimally with ogr2ogr. This is done by limiting the
    input GFS to only the feature types which actually occur in the data, and by adding feature count elements.
    """

    XSLT_TEMPLATE = """<?xml version="1.0" encoding="UTF-8"?>
      <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
        <xsl:output method="xml" version="1.0" encoding="utf-8" indent="yes" omit-xml-declaration="yes"/>
        <xsl:strip-space elements="*"/>
        <xsl:template match="/ | @* | node()">
            <xsl:copy>
                <xsl:apply-templates select="@* | node()" />
示例#9
0
# -*- coding: utf-8 -*-
#
# Output classes for ETL, executing commands.
#
# Author: Frank Steggink
#
import subprocess
import os
import shutil
from stetl.component import Config
from stetl.output import Output
from stetl.util import Util
from stetl.packet import FORMAT

log = Util.get_log('execoutput')


class ExecOutput(Output):
    """
    Executes any command (abstract base class).
    """
    def __init__(self, configdict, section, consumes):
        Output.__init__(self, configdict, section, consumes)

    def write(self, packet):
        return packet

    def execute_cmd(self, cmd):
        use_shell = True
        if os.name == 'nt':
            use_shell = False
示例#10
0
# -*- coding: utf-8 -*-
#
# Example of user-defined component.
#
# Author:Just van den Broecke

from stetl.util import Util
from stetl.filter import Filter
from stetl.packet import FORMAT

log = Util.get_log("myfilter")


class MyFilter(Filter):
    # Constructor
    def __init__(self, configdict, section):
        Filter.__init__(self, configdict, section, consumes=FORMAT.etree_doc, produces=FORMAT.etree_doc)

    def invoke(self, packet):
        if packet.data is None:
            return packet
        return self.do_something(packet)

    def do_something(self, packet):
        log.info("CALLING MyFilter OK!!!!")
        data = packet.data
        self.show_data(data)

        return packet

    def show_data(self, data):
示例#11
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Base classes for HTTP output like WFS-T and SOS-T or any other HTTP writing service.
#
# Author: Just van den Broecke
#
from stetl.output import Output
from stetl.util import Util
from stetl.packet import FORMAT
from stetl.component import Config
import httplib
import base64

log = Util.get_log('httpoutput')

class HttpOutput(Output):
    """
    Output via HTTP protocol, usually via POST.

    consumes=FORMAT.any
    """

    @Config(ptype=str, default=None, required=True)
    def host(self):
        """
        The hostname/IP addr for target request.

        Required: True

        Default: None
示例#12
0
文件: ogrinput.py 项目: reinout/stetl
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Input classes for ETL via GDAL OGR.
#
# Author: Just van den Broecke
#
import subprocess
from stetl.util import Util
from stetl.input import Input
from stetl.packet import FORMAT

log = Util.get_log("ogrinput")


class OgrPostgisInput(Input):
    """
     Input from PostGIS via ogr2ogr command.
     TODO: look to use Fiona or direct OGR via Python.

     produces=FORMAT.xml_line_stream
    """

    # TODO make this template configurable so we can have generic ogr2ogr input....
    pg_conn_tmpl = "PG:host=%s dbname=%s active_schema=%s user=%s password=%s port=%s"
    cmd_tmpl = "ogr2ogr|-t_srs|%s|-s_srs|%s|-f|GML|%s|-dsco|FORMAT=%s|-lco|DIM=%s|%s|-SQL|%s|-nln|%s|%s"

    # Constructor
    def __init__(self, configdict, section):
        Input.__init__(self, configdict, section, produces=FORMAT.xml_line_stream)
示例#13
0
#!/usr/bin/env python
#
# Output classes for ETL.
#
# Author: Just van den Broecke
#
import subprocess
import os
import shutil

from stetl.component import Config
from stetl.output import Output
from stetl.util import Util, gdal, ogr, osr
from stetl.packet import FORMAT

log = Util.get_log('ogroutput')


class OgrOutput(Output):
    """
    Direct GDAL OGR output via Python OGR wrapper. Via the Python API http://gdal.org/python
    OGR Features are written.

    This output can write almost any geospatial, OGR-defined, dataformat.

    consumes=FORMAT.ogr_feature or FORMAT.ogr_feature_array
    """

    # Start attribute config meta
    # Applying Decorator pattern with the Config class to provide
    # read-only config values from the configured properties.
示例#14
0
# -*- coding: utf-8 -*-
#
# MeasurementsDbInput: Reads RIVM raw AQ/LML file data from measurements table and converts to recordlist
#
# Author:Just van den Broecke

from stetl.util import Util, etree
from stetl.inputs.dbinput import PostgresDbInput
from stetl.packet import FORMAT
from stetl.postgis import PostGIS

from datetime import datetime

log = Util.get_log("MeasurementsDbInput")

class MeasurementsDbInput(PostgresDbInput):
    """
    Reads RIVM raw AQ/LML file data from measurements table and converts to recordlist
    """
    def __init__(self, configdict, section):
        PostgresDbInput.__init__(self, configdict, section)
        self.progress_query = self.cfg.get('progress_query')
        self.progress_update = self.cfg.get('progress_update')
        self.db = None

    def after_chain_invoke(self, packet):
        """
        Called right after entire Component Chain invoke.
        Used to update last id of processed file record.
        """
        log.info('Updating progress table with last_id= %d' % self.last_id)
示例#15
0
#
# Author: Pieter Marsman - 2016

import sys
import traceback
from stetl.component import Config
from stetl.filter import Filter
from stetl.inputs.dbinput import PostgresDbInput
from stetl.packet import FORMAT
from stetl.util import Util

from dateutil import parser

from sensordefs import *

log = Util.get_log("Extractor")


class ExtractFilter(Filter):
    """
    Filter to consume single raw record with sensor (single hour) timeseries values and extract these for each component.
    Input is a single timeseries record for a single hour with all sensorvalues for a single device within that hour.
    """
    @Config(ptype=list, default=[], required=True)
    def sensor_names(self):
        """
        The output sensor names to extract.

        Required: True

        Default: []
示例#16
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Filter: XML validation.
#
# NB: you need to have installed libxml2 2.8.0 or newer!
# Older libxml2 versions like 2.7.8 have a bug which causes failure in GML Schema
# parsing. See https://bugzilla.gnome.org/show_bug.cgi?id=630130
#
# Author:Just van den Broecke
#
from stetl.util import Util, etree
from stetl.filter import Filter
from stetl.packet import FORMAT

log = Util.get_log("xmlvalidator")


class XmlSchemaValidator(Filter):
    """
    Validates an etree doc and prints result to log.

    consumes=FORMAT.etree_doc, produces=FORMAT.etree_doc
    """

    # Constructor
    def __init__(self, configdict, section):
        Filter.__init__(self, configdict, section, consumes=FORMAT.etree_doc, produces=FORMAT.etree_doc)
        self.enabled = self.cfg.get_bool('enabled', True)
        self.xsd = self.cfg.get('xsd')
        log.info("Building the Schema once with (GML XSD) dependencies for schema=%s (be patient...)" % self.xsd)
示例#17
0
文件: fileinput.py 项目: gijs/stetl
# -*- coding: utf-8 -*-
#
# Input classes for ETL, Files.
#
# Author: Just van den Broecke
#
from stetl.input import Input
from stetl.util import Util, etree
from stetl.packet import FORMAT

log = Util.get_log('fileinput')

class FileInput(Input):
    """
    Abstract base class for specific FileInputs.
    """

    def __init__(self, configdict, section, produces):
        Input.__init__(self, configdict, section, produces)

        # path to file or files: can be a dir or files or even multiple, comma separated
        self.file_path = self.cfg.get('file_path')

        # The filename pattern according to Python glob.glob
        self.filename_pattern = self.cfg.get('filename_pattern', '*.[gxGX][mM][lL]')

        # Recurse into directories ?
        self.depth_search = self.cfg.get_bool('depth_search', False)

        # Create the list of files to be used as input
        self.file_list = Util.make_file_list(self.file_path, None, self.filename_pattern, self.depth_search)
示例#18
0
# -*- coding: utf-8 -*-
#
# Input classes for ETL via HTTP.
#
# Author: Just van den Broecke
#
from stetl.input import Input
from stetl.util import Util
from stetl.packet import FORMAT
import urllib
import urllib2


log = Util.get_log('httpinput')


class HttpInput(Input):
    """
     Input via HTTP protocol.

     produces=FORMAT.any
    """

    def __init__(self, configdict, section, produces=FORMAT.any):
        Input.__init__(self, configdict, section, produces)

        # url and optional parameters
        self.url = self.cfg.get('url')
        self.parameters = self.cfg.get('parameters')

        # http://docs.python.org/2/howto/urllib2.html
示例#19
0
#!/usr/bin/env python
#
# Transformation of an etree doc with XSLT.
#
# Author:Just van den Broecke

from stetl.component import Config
from stetl.util import Util, etree
from stetl.filter import Filter
from stetl.packet import FORMAT

log = Util.get_log("xsltfilter")


class XsltFilter(Filter):
    """
    Invokes XSLT processor (via lxml) for given XSLT script on an etree doc.

    consumes=FORMAT.etree_doc, produces=FORMAT.etree_doc
    """
    @Config(ptype=str, required=True)
    def script(self):
        """
        Path to XSLT script file.
        """
        pass

    # Constructor
    def __init__(self, configdict, section):
        Filter.__init__(self,
                        configdict,
示例#20
0
# Expands an archive file into a collection of files.
#
# Author: Just van den Broecke 2021
#
import os.path
from stetl.component import Config
from stetl.filter import Filter
from stetl.util import Util
from stetl.packet import FORMAT

log = Util.get_log('archiveexpander')


class ArchiveExpander(Filter):
    """
    Abstract Base Class.
    Expands an archive file into a collection of files.

    consumes=FORMAT.string, produces=FORMAT.string
    """

    # Start attribute config meta

    @Config(ptype=str, default='temp_dir', required=True)
    def target_dir(self):
        """
        Target directory to write the extracted files to.
        """
        pass

    @Config(ptype=bool, default=False, required=False)
示例#21
0
# -*- coding: utf-8 -*-
#
# String filtering.
#
# Author:Just van den Broecke

from stetl.component import Config
from stetl.util import Util
from stetl.filter import Filter
from stetl.packet import FORMAT

log = Util.get_log("stringfilter")


class StringFilter(Filter):
    """
    Base class for any string filtering
    """

    # Constructor
    def __init__(self, configdict, section, consumes, produces):
        Filter.__init__(self, configdict, section, consumes, produces)

    def invoke(self, packet):
        if packet.data is None:
            return packet
        return self.filter_string(packet)

    def filter_string(self, packet):
        pass
示例#22
0
文件: dbinput.py 项目: dracic/stetl
# -*- coding: utf-8 -*-
#
# Input classes for ETL, databases.
#
# Author: Just van den Broecke
#
from stetl.component import Config
from stetl.input import Input
from stetl.util import Util
from stetl.packet import FORMAT
from stetl.postgis import PostGIS

log = Util.get_log('dbinput')


class DbInput(Input):
    """
    Input from any database (abstract base class).
    """

    def __init__(self, configdict, section, produces):
        Input.__init__(self, configdict, section, produces=produces)

    def read(self, packet):
        return packet


class SqlDbInput(DbInput):
    """
    Input using a query from any SQL-based RDBMS (abstract base class).
    """
示例#23
0
# Output classes for ETL with SensorThings API.
#
# Author: Just van den Broecke
#

from os import path
import requests
import json
import base64

from stetl.util import Util
from stetl.packet import FORMAT
from stetl.component import Config
from stetl.outputs.httpoutput import HttpOutput

log = Util.get_log('staoutput')


class STAOutput(HttpOutput):
    """
    Output via SensorThings API (STA) over plain HTTP using the HttpOutput base class.
    See examples: http://www.sensorup.com/docs/?python

    consumes=FORMAT.record_array

    """
    @Config(ptype=str,
            default='application/json;charset=UTF-8',
            required=False)
    def content_type(self):
        """
示例#24
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# POST data via WFS Transactional protocol (WFS-T).
#
# Author: Just van den Broecke
#
from stetl.component import Config
from stetl.output import Output
from stetl.util import Util
from stetl.packet import FORMAT
import httplib

log = Util.get_log('wfsoutput')


class WFSTOutput(Output):
    """
    Insert features via WFS-T (WFS Transaction) OGC protocol from an etree doc.

    consumes=FORMAT.etree_doc
    """

    # Start attribute config meta
    @Config(ptype=str, required=True, default=None)
    def wfs_host(self):
        """
        Hostname-part of URL e.g. geodata.ngr.nl.
        """
        pass
示例#25
0
# -*- coding: utf-8 -*-
#
# MeasurementsDbInput: Reads SmartEm raw AQ/LML file data from measurements table and converts to recordlist
#
# Author:Just van den Broecke

from stetl.util import Util, etree
from stetl.inputs.dbinput import PostgresDbInput
from stetl.packet import FORMAT
from stetl.postgis import PostGIS

from datetime import datetime

log = Util.get_log("MeasurementsDbInput")


class MeasurementsDbInput(PostgresDbInput):
    """
    Reads SmartEm raw AQ/LML file data from measurements table and converts to recordlist
    """
    def __init__(self, configdict, section):
        PostgresDbInput.__init__(self, configdict, section)
        self.progress_query = self.cfg.get('progress_query')
        self.progress_update = self.cfg.get('progress_update')
        self.db = None

    def after_chain_invoke(self, packet):
        """
        Called right after entire Component Chain invoke.
        Used to update last id of processed file record.
        """
示例#26
0
# -*- coding: utf-8 -*-
#
# Output classes for ETL.
#
# Author: Just van den Broecke
#
from stetl.output import Output
from stetl.util import Util
from stetl.packet import FORMAT

log = Util.get_log('standardoutput')


#
class StandardOutput(Output):
    """
    Print any input to standard output.

    consumes=FORMAT.any
    """
    def __init__(self, configdict, section):
        Output.__init__(self, configdict, section, consumes=FORMAT.any)

    def write(self, packet):
        if packet.data is None:
            return packet

        # Default: print to stdout
        print(packet.to_string())
        return packet
示例#27
0
# -*- coding: utf-8 -*-
#
# Writes the payload of a packet as a string to a file.
# Based on outputs.fileoutput.FileOutput.
#
# Author: Frank Steggink
#
from stetl.component import Config
from stetl.filter import Filter
from stetl.util import Util
from stetl.packet import FORMAT

import os

log = Util.get_log('packetwriter')


class PacketWriter(Filter):
    """
    Writes the payload of a packet as a string to a file.

    consumes=FORMAT.any, produces=FORMAT.string
    """

    # Start attribute config meta
    @Config(ptype=str, default=None, required=True)
    def file_path(self):
        """
        File path to write content to.
        """
        pass
示例#28
0
from stetl.component import Config
from stetl.inputs.dbinput import PostgresDbInput
from stetl.util import Util
from smartem.sosinput import SosInput

log = Util.get_log("RIVMSosInput")


class RIVMSosInput(SosInput, PostgresDbInput):
    """
    Specialized SOS Input for RIVM SOS, adds progress tracking.
    """
    @Config(ptype=str, required=True)
    def progress_query(self):
        """
        Query to fetch progress for feature

        Required: True
        """

    def __init__(self, configdict, section):
        SosInput.__init__(self, configdict, section)
        PostgresDbInput.__init__(self, configdict, section)
        self.progress = dict()

    def init(self):
        SosInput.init(self)
        PostgresDbInput.init(self)

        progress_list = self.do_query(self.progress_query)
        for progress_row in progress_list:
示例#29
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Output classes for ETL.
#
# Author: Just van den Broecke
#
from stetl.outputs.httpoutput import HttpOutput
from stetl.util import Util
from stetl.packet import FORMAT

log = Util.get_log('sosoutput')


class SOSTOutput(HttpOutput):
    """
    Output via SOS-T protocol over plain HTTP.

    consumes=FORMAT.record
    """
    def __init__(self, configdict, section):
        HttpOutput.__init__(self,
                            configdict,
                            section,
                            consumes=FORMAT.record_array)
        self.content_type = self.cfg.get('content_type',
                                         'application/json;charset=UTF-8')
        self.sos_request = self.cfg.get('sos_request', 'insert-observation')

        # Template file, to be used as POST body with substituted values
        self.template_file_ext = self.cfg.get('template_file_ext', 'json')
示例#30
0
# -*- coding: utf-8 -*-
#
# Input classes for ETL, databases.
#
# Author: Just van den Broecke
#
from stetl.component import Config
from stetl.input import Input
from stetl.util import Util
from stetl.packet import FORMAT
from stetl.postgis import PostGIS

log = Util.get_log('dbinput')


class DbInput(Input):
    """
    Input from any database (abstract base class).
    """
    def __init__(self, configdict, section, produces):
        Input.__init__(self, configdict, section, produces=produces)

    def read(self, packet):
        return packet


class SqlDbInput(DbInput):
    """
    Input using a query from any SQL-based RDBMS (abstract base class).
    """
示例#31
0
# Transformation of any input using Python Templating as
# meant in: https://wiki.python.org/moin/Templating.
# A TemplatingFilter typically is configured with a template file.
# The input is typically the Template context, the variables to be substituted.
# The output is a string passed to the next Filter or Output.
#
# Author:Just van den Broecke

from stetl.util import Util, ogr, osr
from stetl.component import Config
from stetl.filter import Filter
from stetl.packet import FORMAT
from string import Template
import os

log = Util.get_log("templatingfilter")


class TemplatingFilter(Filter):
    """
    Abstract base class for specific template-based filters.
    See https://wiki.python.org/moin/Templating
    Subclasses implement a specific template language like Python string.Template, Mako, Genshi, Jinja2,

    consumes=FORMAT.any, produces=FORMAT.string
    """

    # Start attribute config meta
    # Applying Decorator pattern with the Config class to provide
    # read-only config values from the configured properties.
示例#32
0
#!/usr/bin/env python
#
# Transformation of an etree doc with XSLT.
#
# Author:Just van den Broecke

from stetl.component import Config
from stetl.util import Util, etree
from stetl.filter import Filter
from stetl.packet import FORMAT

log = Util.get_log("xsltfilter")


class XsltFilter(Filter):
    """
    Invokes XSLT processor (via lxml) for given XSLT script on an etree doc.

    consumes=FORMAT.etree_doc, produces=FORMAT.etree_doc
    """

    @Config(ptype=str, required=True)
    def script(self):
        """
        Path to XSLT script file.
        """
        pass

    # Constructor
    def __init__(self, configdict, section):
        Filter.__init__(self, configdict, section, consumes=FORMAT.etree_doc, produces=FORMAT.etree_doc)
示例#33
0
# -*- coding: utf-8 -*-
#
# Reads an XML file and returns XML elements.
# Based on inputs.fileinput.XmlElementStreamFileInput.
#
# Author: Frank Steggink
#
from copy import deepcopy

from stetl.component import Config
from stetl.filter import Filter
from stetl.util import Util, etree
from stetl.packet import FORMAT

log = Util.get_log('xmlelementreader')


class XmlElementReader(Filter):
    """
    Extracts XML elements from a file, outputs each feature element in Packet.
    Parsing is streaming (no internal DOM buildup) so any file size can be handled.
    Use this class for your big GML files!

    consumes=FORMAT.string, produces=FORMAT.etree_element
    """

    # Start attribute config meta
    @Config(ptype=list, default=None, required=True)
    def element_tags(self):
        """
        Comma-separated string of XML (feature) element tag names of the elements that should be extracted
示例#34
0
# Filter that does noting, just passes any data through.
#
# Author:Just van den Broecke

from stetl.util import Util
from stetl.filter import Filter
from stetl.packet import FORMAT

log = Util.get_log("nullfilter")


class NullFilter(Filter):
    """
    Pass-through Filter, does nothing. Mainly used in Test Cases.
    """

    # Constructor
    def __init__(self,
                 configdict,
                 section,
                 consumes=FORMAT.any,
                 produces=FORMAT.any):
        Filter.__init__(self, configdict, section, consumes, produces)

    def invoke(self, packet):
        return packet
示例#35
0
# -*- coding: utf-8 -*-
#
# Reads an XML file and returns XML elements.
# Based on inputs.fileinput.XmlElementStreamFileInput.
#
# Author: Frank Steggink
#
from copy import deepcopy

from stetl.component import Config
from stetl.filter import Filter
from stetl.util import Util, etree
from stetl.packet import FORMAT

log = Util.get_log('xmlelementreader')


class XmlElementReader(Filter):
    """
    Extracts XML elements from a file, outputs each feature element in Packet.
    Parsing is streaming (no internal DOM buildup) so any file size can be handled.
    Use this class for your big GML files!

    consumes=FORMAT.string, produces=FORMAT.etree_element
    """

    # Start attribute config meta
    @Config(ptype=list, default=None, required=True)
    def element_tags(self):
        """
        Comma-separated string of XML (feature) element tag names of the elements that should be extracted
示例#36
0
#!/usr/bin/env python
#
# Input classes for ETL.
#
# Author: Just van den Broecke
#
import codecs
import re

from stetl.component import Config
from stetl.postgis import PostGIS
from stetl.input import Input
from stetl.util import Util, etree, StringIO
from stetl.packet import FORMAT

log = Util.get_log('deegreeinput')


class DeegreeBlobstoreInput(Input):
    """
    Read features from deegree Blobstore DB into an etree doc.

    produces=FORMAT.etree_doc
    """

    # Start attribute config meta

    @Config(ptype=int, required=False, default=10000)
    def max_features_per_doc(self):
        """
        Max features to read from input feature GML stream per internal document.
示例#37
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Output Components for deegree server storage (www.deegree.org).
#
# Author: Just van den Broecke
#
# NB deegree also supports WFS-T!
#
from stetl.postgis import PostGIS
from stetl.output import Output
from stetl.util import Util, etree
from stetl.packet import FORMAT
import os

log = Util.get_log('deegreeoutput')

class DeegreeBlobstoreOutput(Output):
    """
    Insert features into deegree Blobstore from an etree doc.

    consumes=FORMAT.etree_doc
    """
    def __init__(self, configdict, section):
        Output.__init__(self, configdict, section, consumes=FORMAT.etree_doc)
        self.overwrite = self.cfg.get_bool('overwrite')
        self.srid = self.cfg.get_int('srid', -1)
        self.feature_member_tag = self.cfg.get('feature_member_tag')
        self.feature_type_ids = {}

    def init(self):
示例#38
0
# Transformation of any input using Python Templating as
# meant in: https://wiki.python.org/moin/Templating.
# A TemplatingFilter typically is configured with a template file.
# The input is typically the Template context, the variables to be substituted.
# The output is a string passed to the next Filter or Output.
#
# Author:Just van den Broecke
import os

from stetl.util import Util, ogr, osr
from stetl.component import Config
from stetl.filter import Filter
from stetl.packet import FORMAT
from string import Template

log = Util.get_log("templatingfilter")


class TemplatingFilter(Filter):
    """
    Abstract base class for specific template-based filters.
    See https://wiki.python.org/moin/Templating
    Subclasses implement a specific template language like Python string.Template, Mako, Genshi, Jinja2,

    consumes=FORMAT.any, produces=FORMAT.string
    """

    # Start attribute config meta
    # Applying Decorator pattern with the Config class to provide
    # read-only config values from the configured properties.
示例#39
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Converts Stetl Packet FORMATs. This can be used to connect
# Stetl components with different output/input formats.
#
# Author:Just van den Broecke

from stetl.component import Config
from stetl.util import Util, etree
from stetl.filter import Filter
from stetl.packet import FORMAT
import json

log = Util.get_log("formatconverter")


class FormatConverter(Filter):
    """
    Converts (almost) any packet format (if converter available).

    consumes=FORMAT.any, produces=FORMAT.any but actual formats
    are changed at initialization based on the input to output format to
    be converted via the input_format and output_format config parameters.
    """

    # Start attribute config meta
    # Applying Decorator pattern with the Config class to provide
    # read-only config values from the configured properties.

    @Config(ptype=dict, default=None, required=False)
示例#40
0
#
# Filter to consume a raw record of Smart Emission data (one hour for one device) , refining these, producing records.
#

# Author: Just van den Broecke - 2015
import sys, traceback

from stetl.filter import Filter
from stetl.util import Util
from stetl.packet import FORMAT
from stetl.component import Config

import pytz
from sensordefs import *

log = Util.get_log("RefineFilter")


class RefineFilter(Filter):
    """
    Filter to consume single raw record with sensor (single hour) timeseries values and produce refined record for each component.
    Refinement entails: calibration (e.g. Ohm to ug/m3) and aggregation (hour-values).
    Input is a single timeseries record for a single hour with all sensorvalues for a single device within that hour.
    """
    @Config(ptype=list, default=[], required=True)
    def sensor_names(self):
        """
        The output sensor names to refine.

        Required: True
示例#41
0
#!/usr/bin/env python
#
# Extracts data from a string using a regular expression and generates a record.
#
# Author: Frank Steggink
import re

from stetl.component import Config
from stetl.filter import Filter
from stetl.packet import FORMAT
from stetl.util import Util

log = Util.get_log("regexfilter")


class RegexFilter(Filter):
    """
    Extracts data from a string using a regular expression and returns the named groups as a record.
    consumes=FORMAT.string, produces=FORMAT.record
    """

    # Start attribute config meta
    # Applying Decorator pattern with the Config class to provide
    # read-only config values from the configured properties.

    @Config(ptype=str, default=None, required=True)
    def pattern_string(self):
        """
        Regex pattern string. Should contain named groups.
        """
        pass
示例#42
0
# -*- coding: utf-8 -*-
#
# Output classes for ETL.
#
# Author: Just van den Broecke
#
from stetl.output import Output
from stetl.util import Util
from stetl.packet import FORMAT

log = Util.get_log('standardoutput')


#
class StandardOutput(Output):
    """
    Print any input to standard output.

    consumes=FORMAT.any
    """

    def __init__(self, configdict, section):
        Output.__init__(self, configdict, section, consumes=FORMAT.any)

    def write(self, packet):
        if packet.data is None:
            return packet

        # Default: print to stdout
        print(packet.to_string())
        return packet
示例#43
0
#
# Filter that deals with subfeatures in BGT GML files.
#
# Author: Frank Steggink

import os

from copy import deepcopy
# We need specifically lxml, because of the incremental XML generation
from lxml import etree
from stetl.component import Config
from stetl.filter import Filter
from stetl.packet import FORMAT
from stetl.util import Util

log = Util.get_log("subfeaturehandler")


class SubFeatureHandler(Filter):
    """
    This filter checks whether the data file contains the given parent features. If this is the case, the parent feature
    and subfeatures are split into different features.
    """

    # Start attribute config meta
    # Applying Decorator pattern with the Config class to provide
    # read-only config values from the configured properties.

    @Config(ptype=str, default=None, required=True)
    def temp_file(self):
        """
示例#44
0
# Extracts a file from a ZIP file, and saves it as the given file name.
#
# Author: Frank Steggink
#
from stetl.component import Config
from stetl.filter import Filter
from stetl.util import Util
from stetl.packet import FORMAT

log = Util.get_log('zipfileextractor')

BUFFER_SIZE = 1024 * 1024 * 1024


class ZipFileExtractor(Filter):
    """
    Extracts a file from a ZIP file, and saves it as the given file name.

    consumes=FORMAT.record, produces=FORMAT.string
    """

    # Start attribute config meta
    @Config(ptype=str, default=None, required=True)
    def file_path(self):
        """
        File name to write the extracted file to.
        """
        pass

    @Config(ptype=bool, default=True, required=False)
    def delete_file(self):
示例#45
0
# -*- coding: utf-8 -*-
#
# Filter that does noting, just passes any data through.
#
# Author:Just van den Broecke

from stetl.util import Util
from stetl.filter import Filter
from stetl.packet import FORMAT

log = Util.get_log("nullfilter")


class NullFilter(Filter):
    """
    Pass-through Filter, does nothing. Mainly used in Test Cases.
    """

    # Constructor
    def __init__(self, configdict, section, consumes=FORMAT.any, produces=FORMAT.any):
        Filter.__init__(self, configdict, section, consumes, produces)

    def invoke(self, packet):
        return packet
示例#46
0
# -*- coding: utf-8 -*-
#
# RawSensorInput: harvest raw timeseries from CityGIS Sensor REST API.
# Use PostGIS DB to track progress of harvesting.
#
# Author:Just van den Broecke

from stetl.util import Util
from stetl.inputs.httpinput import HttpInput
from stetl.packet import FORMAT
from stetl.postgis import PostGIS

log = Util.get_log("RawSensorInput")


class RawSensorInput(HttpInput):
    """
    Raw Sensor REST API (CityGIS) version for HttpInput: adds check for each file if it is already in DB.
    """
    def __init__(self, configdict, section, produces=FORMAT.record):
        HttpInput.__init__(self, configdict, section, produces)
        self.query = self.cfg.get('query')
        self.db = None

    def init(self):
        # Connect only once to DB
        log.info('Init: connect to DB')
        self.db = PostGIS(self.cfg.get_dict())
        self.db.connect()

        # Let superclass read file list from Apache URL
示例#47
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Extracts data from a string using a regular expression and generates a record.
#
# Author: Frank Steggink

from stetl.component import Config
from stetl.filter import Filter
from stetl.packet import FORMAT
from stetl.util import Util
import re

log = Util.get_log("regexfilter")


class RegexFilter(Filter):
    """
    Extracts data from a string using a regular expression and returns the named groups as a record.
    consumes=FORMAT.string, produces=FORMAT.record
    """

    # Start attribute config meta
    # Applying Decorator pattern with the Config class to provide
    # read-only config values from the configured properties.

    @Config(ptype=str, default=None, required=True)
    def pattern_string(self):
        """
        Regex pattern string. Should contain named groups.
        """
示例#48
0
# -*- coding: utf-8 -*-
#
# RawSensorLastInput: fetch last raw values from CityGIS/Intemo Raw Sensor REST API.
#
# Author:Just van den Broecke

import time
from datetime import datetime, timedelta
from stetl.component import Config
from stetl.util import Util
from stetl.packet import FORMAT
from smartem.util.utc import zulu_to_gmt
from smartem.rawsensorapi import RawSensorAPIInput

log = Util.get_log("RawSensorAPI")


class RawSensorLastInput(RawSensorAPIInput):
    """
    Raw Sensor REST API (CityGIS) to fetch last values for all devices.
    """
    @Config(ptype=list, default=[], required=True)
    def sensor_names(self):
        """
        The output sensor names to refine.

        Required: True

        Default: []
        """
        pass
示例#49
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Input classes for ETL via GDAL OGR.
#
# Author: Just van den Broecke
#
import subprocess
from stetl.component import Config
from stetl.util import Util, gdal, ogr
from stetl.input import Input
from stetl.packet import FORMAT

log = Util.get_log('ogrinput')


class OgrInput(Input):
    """
    Direct GDAL OGR input via Python OGR wrapper. Via the Python API http://gdal.org/python
    an OGR data source is accessed and from each layer the Features are read.
    Each Layer corresponds to a "doc", so for multi-layer sources the 'end-of-doc' flag is
    set after a Layer has been read.

    This input can read almost any geospatial dataformat. One can use the features directly
    in a Stetl Filter or use a converter to e.g. convert to GeoJSON structures.

    produces=FORMAT.ogr_feature or FORMAT.ogr_feature_array (all features)
    """

    # Start attribute config meta
    # Applying Decorator pattern with the Config class to provide
示例#50
0
# -*- coding: utf-8 -*-
#
# Smart Emission DB input classes.
#
# Author: Just van den Broecke

from stetl.component import Config
from stetl.util import Util
from stetl.inputs.dbinput import PostgresDbInput

log = Util.get_log("SmartemDbInput")


class RawDbInput(PostgresDbInput):
    """
    Reads raw Smartem Harvested json data from timeseries table and converts to recordlist.
    """
    @Config(ptype=str, required=True, default=None)
    def last_gid_query(self):
        """
        The query (string) to fetch last gid that was processed.
        """
        pass

    @Config(ptype=str, required=True, default=None)
    def gids_query(self):
        """
        The query (string) to fetch all gid's (id's) to be processed.
        """
        pass
示例#51
0
# -*- coding: utf-8 -*-
#
# Output to File classes.
#
# Author: Just van den Broecke
#
from stetl.output import Output
from stetl.util import Util
from stetl.packet import FORMAT

import os

log = Util.get_log('fileoutput')

class FileOutput(Output):
    """
    Pretty print XML to file from an etree doc.

    consumes=FORMAT.etree_doc
    """

    def __init__(self, configdict, section):
        Output.__init__(self, configdict, section, consumes=FORMAT.etree_doc)
        log.info("working dir %s" % os.getcwd())

    def write(self, packet):
        if packet.data is None:
            return packet

        file_path = self.cfg.get('file_path')
        return self.write_file(packet, file_path)
示例#52
0
# Packet buffering.
#
# Author:Just van den Broecke

import copy
from stetl.util import Util
from stetl.filter import Filter
from stetl.packet import FORMAT

log = Util.get_log("packetbuffer")


class PacketBuffer(Filter):
    """
    Buffers all incoming Packets, main use is unit-testing to inspect Packets after ETL is done.
    """

    # Constructor
    def __init__(self, configdict, section):
        Filter.__init__(self, configdict, section, consumes=FORMAT.any, produces=FORMAT.any)
        self.packet_list = []

    def invoke(self, packet):
        # Buffer Packet and pass-through, we need a deep copy as Packets may be cleared/reused
        self.packet_list.append(copy.copy(packet))
        return packet
示例#53
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Output classes for ETL.
#
# Author: Just van den Broecke
#
from os import sys, path
from stetl.outputs.httpoutput import HttpOutput
from stetl.util import Util
from stetl.packet import FORMAT
from stetl.component import Config
log = Util.get_log('sosoutput')

class SOSTOutput(HttpOutput):
    """
    Output via SOS-T protocol over plain HTTP.

    consumes=FORMAT.record_array
    """

    @Config(ptype=str, default='application/json;charset=UTF-8', required=True)
    def content_type(self):
        """
        The content type (for template).

        Required: True

        Default: application/json;charset=UTF-8
        """
        pass
示例#54
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# POST data via WFS Transactional protocol (WFS-T).
#
# Author: Just van den Broecke
#
from stetl.component import Config
from stetl.output import Output
from stetl.util import Util
from stetl.packet import FORMAT
import httplib

log = Util.get_log('wfsoutput')


class WFSTOutput(Output):
    """
    Insert features via WFS-T (WFS Transaction) OGC protocol from an etree doc.

    consumes=FORMAT.etree_doc
    """

    # Start attribute config meta
    @Config(ptype=str, required=True, default=None)
    def wfs_host(self):
        """
        Hostname-part of URL e.g. geodata.ngr.nl.
        """
        pass
示例#55
0
# -*- coding: utf-8 -*-
#
# Extracts a file from a ZIP file, and saves it as the given file name.
#
# Author: Frank Steggink
#
from stetl.component import Config
from stetl.filter import Filter
from stetl.util import Util
from stetl.packet import FORMAT

log = Util.get_log('zipfileextractor')

BUFFER_SIZE = 1024 * 1024 * 1024


class ZipFileExtractor(Filter):
    """
    Extracts a file from a ZIP file, and saves it as the given file name.

    consumes=FORMAT.record, produces=FORMAT.string
    """

    # Start attribute config meta
    @Config(ptype=str, default=None, required=True)
    def file_path(self):
        """
        File name to write the extracted file to.
        """
        pass
示例#56
0
# Output classes for ETL, executing commands.
#
# Author: Frank Steggink
#
import subprocess
import os
import shutil
from stetl.component import Config
from stetl.output import Output
from stetl.util import Util
from stetl.packet import FORMAT

log = Util.get_log('execoutput')


class ExecOutput(Output):
    """
    Executes any command (abstract base class).
    """

    @Config(ptype=str, default='', required=False)
    def env_args(self):
        """
        Provides of list of environment variables which will be used when executing the given command.

        Example: env_args = pgpassword=postgres othersetting=value~with~spaces
        """
        pass

    @Config(ptype=str, default='=', required=False)
    def env_separator(self):