def cli_load_records(ctx, config, path, recursive, yes, verbosity): """Load metadata records from directory or file into repository""" cfg = parse_ini_config(config) context = pconfig.StaticContext() load_records(context, cfg['repository']['database'], cfg['repository']['table'], path, recursive, yes)
def cli_delete_records(ctx, config, yes, verbosity): """Delete all records from repository""" cfg = parse_ini_config(config) context = pconfig.StaticContext() delete_records(context, cfg['repository']['database'], cfg['repository']['table'])
def cli_optimize_db(ctx, config, verbosity): """Optimize repository database""" cfg = parse_ini_config(config) context = pconfig.StaticContext() optimize_db(context, cfg['repository']['database'], cfg['repository']['table'])
def cli_rebuild_db_indexes(ctx, config, verbosity): """Rebuild repository database indexes""" cfg = parse_ini_config(config) context = pconfig.StaticContext() rebuild_db_indexes(context, cfg['repository']['database'], cfg['repository']['table'])
def cli_refresh_harvested_records(ctx, config, verbosity, url): """Refresh / harvest non-local records in repository""" cfg = parse_ini_config(config) context = pconfig.StaticContext() refresh_harvested_records(context, cfg['repository']['database'], cfg['repository']['table'], url)
def cli_gen_sitemap(ctx, config, output, verbosity): """Generate XML Sitemap""" cfg = parse_ini_config(config) context = pconfig.StaticContext() gen_sitemap(context, cfg['repository']['database'], cfg['repository']['table'], cfg['server']['url'], output)
def cli_export_records(ctx, config, path, verbosity): """Dump metadata records from repository into directory""" cfg = parse_ini_config(config) context = pconfig.StaticContext() export_records(context, cfg['repository']['database'], cfg['repository']['table'], path)
def clear_records(): ''' Function that clears records for both database and search backend. ''' registry.REGISTRY_INDEX_NAME = 'test' yield es_client = rawes.Elastic(registry.REGISTRY_SEARCH_URL) es_client.delete(registry.REGISTRY_INDEX_NAME) context = config.StaticContext() delete_records(context, registry.PYCSW['repository']['database'], registry.PYCSW['repository']['table'])
def __init__(self, *args, **kwargs): self.es_status = 400 response = es_connect() catalog = REGISTRY_INDEX_NAME if 'error' not in response: self.es_status = 200 self.es, version = response self.catalog = get_or_create_index(self.es, version, catalog) database = PYCSW['repository']['database'] return super(RegistryRepository, self).__init__(database, context=config.StaticContext())
def test_clear_records(client): response = client.delete('/catalog/{0}/csw'.format(catalog_slug)) assert 200 == response.status_code assert 'removed' in response.content.decode('utf-8') # Delete records in pycsw database. context = config.StaticContext() delete_records(context, registry.PYCSW['repository']['database'], registry.PYCSW['repository']['table']) # Delete a catalog that has not been created previosuly. response = client.delete('/catalog/boom/csw') assert 404 == response.status_code assert 'does not exist' in response.content.decode('utf-8')
def __init__(self, *args, **kwargs): if args and hasattr(args[0], 'url'): url = args[0].url self.catalog = parse_catalog_from_url( url) if urlparse(url).path != '/csw' else None try: self.es, self.version = es_connect(url=REGISTRY_SEARCH_URL) self.es_status = 200 except requests.exceptions.ConnectionError: self.es_status = 404 database = PYCSW['repository']['database'] return super(RegistryRepository, self).__init__(database, context=config.StaticContext())
def __init__(self): self.COMMAND = None self.XML_DIRPATH = None self.CFG = None self.RECURSIVE = False self.OUTPUT_FILE = None self.CSW_URL = None self.XML = None self.XSD = None self.TIMEOUT = 30 self.FORCE_CONFIRM = False self.METADATA = None self.DATABASE = None self.URL = None self.HOME = None self.TABLE = None self.CONTEXT = config.StaticContext()
def __init__(self): try: self.context = config.StaticContext() config_path = '/etc/pycsw/pycsw.cfg' safe_config = configparser.SafeConfigParser() with open(config_path) as f: safe_config.readfp(f) self.config = safe_config self.database = safe_config.get('repository', 'database') self.url = safe_config.get('server', 'url') self.home = safe_config.get('server', 'home') self.metdata = dict(safe_config.items('metadata:main')) try: self.table = safe_config.get('repository', 'table') except configparser.NoOptionError: self.table = 'records' except: print("[ERROR] failed to load pycsw config") self.context = None
def test_load_records(client): test_create_catalog(client) repository = registry.RegistryRepository() repository.catalog = catalog_slug payload = construct_payload(layers_list=layers_list) xml_records = etree.fromstring(payload) context = config.StaticContext() registry.load_records(repository, xml_records, context) # Provisional hack to refresh documents in elasticsearch. es_client = rawes.Elastic(registry.REGISTRY_SEARCH_URL) es_client.post('/_refresh') records_number = int(repository.query('')[0]) assert len(layers_list) == records_number # Verify records added into elasticsearch using the search api. response = client.get(catalog_search_api) assert 200 == response.status_code search_response = json.loads(response.content.decode('utf-8')) assert len(layers_list) == search_response['a.matchDocs'] test_clear_records(client)
def __init__(self, rtconfig=None, env=None, version='3.0.0'): """ Initialize CSW """ if not env: self.environ = os.environ else: self.environ = env self.context = config.StaticContext() # Lazy load this when needed # (it will permanently update global cfg namespaces) self.sruobj = None self.opensearchobj = None self.oaipmhobj = None # init kvp self.kvp = {} self.mode = 'csw' self.async = False self.soap = False self.request = None self.exception = False self.status = 'OK' self.profiles = None self.manager = False self.outputschemas = {} self.mimetype = 'application/xml; charset=UTF-8' self.encoding = 'UTF-8' self.pretty_print = 0 self.domainquerytype = 'list' self.orm = 'django' self.language = {'639_code': 'en', 'text': 'english'} self.process_time_start = time() # define CSW implementation object (default CSW3) self.iface = csw3.Csw3(server_csw=self) self.request_version = version if self.request_version == '2.0.2': self.iface = csw2.Csw2(server_csw=self) self.context.set_model('csw') # load user configuration try: LOGGER.info('Loading user configuration') if isinstance(rtconfig, SafeConfigParser): # serialized already self.config = rtconfig else: self.config = SafeConfigParser() if isinstance(rtconfig, dict): # dictionary for section, options in rtconfig.items(): self.config.add_section(section) for k, v in options.items(): self.config.set(section, k, v) else: # configuration file import codecs with codecs.open(rtconfig, encoding='utf-8') as scp: self.config.readfp(scp) except Exception as err: LOGGER.exception('Could not load user configuration: %s', err) self.response = self.iface.exceptionreport( 'NoApplicableCode', 'service', 'Error opening configuration %s' % rtconfig ) return # set server.home safely # TODO: make this more abstract self.config.set( 'server', 'home', os.path.dirname(os.path.join(os.path.dirname(__file__), '..')) ) self.context.pycsw_home = self.config.get('server', 'home') self.context.url = self.config.get('server', 'url') log.setup_logger(self.config) LOGGER.info('running configuration %s', rtconfig) LOGGER.debug('QUERY_STRING: %s', self.environ['QUERY_STRING']) # set OGC schemas location if not self.config.has_option('server', 'ogc_schemas_base'): self.config.set('server', 'ogc_schemas_base', self.context.ogc_schemas_base) # set mimetype if self.config.has_option('server', 'mimetype'): self.mimetype = self.config.get('server', 'mimetype').encode() # set encoding if self.config.has_option('server', 'encoding'): self.encoding = self.config.get('server', 'encoding') # set domainquerytype if self.config.has_option('server', 'domainquerytype'): self.domainquerytype = self.config.get('server', 'domainquerytype') # set XML pretty print if (self.config.has_option('server', 'pretty_print') and self.config.get('server', 'pretty_print') == 'true'): self.pretty_print = 1 # set Spatial Ranking option if (self.config.has_option('server', 'spatial_ranking') and self.config.get('server', 'spatial_ranking') == 'true'): util.ranking_enabled = True # set language default if self.config.has_option('server', 'language'): try: LOGGER.info('Setting language') lang_code = self.config.get('server', 'language').split('-')[0] self.language['639_code'] = lang_code self.language['text'] = self.context.languages[lang_code] except Exception as err: LOGGER.exception('Could not set language: %s', err) pass LOGGER.debug('Configuration: %s.', self.config) LOGGER.debug('Model: %s.', self.context.model) # load user-defined mappings if they exist if self.config.has_option('repository', 'mappings'): # override default repository mappings try: import imp module = self.config.get('repository', 'mappings') if '/' in module: # filepath modulename = '%s' % os.path.splitext(module)[0].replace( os.sep, '.') mappings = imp.load_source(modulename, module) else: # dotted name mappings = __import__(module, fromlist=['']) LOGGER.info('Loading custom repository mappings ' 'from %s', module) self.context.md_core_model = mappings.MD_CORE_MODEL self.context.refresh_dc(mappings.MD_CORE_MODEL) except Exception as err: LOGGER.exception('Could not load custom mappings: %s', err) self.response = self.iface.exceptionreport( 'NoApplicableCode', 'service', 'Could not load repository.mappings') # load outputschemas LOGGER.info('Loading outputschemas') for osch in pycsw.plugins.outputschemas.__all__: output_schema_module = __import__( 'pycsw.plugins.outputschemas.%s' % osch) mod = getattr(output_schema_module.plugins.outputschemas, osch) self.outputschemas[mod.NAMESPACE] = mod LOGGER.debug('Outputschemas loaded: %s.', self.outputschemas) LOGGER.debug('Namespaces: %s', self.context.namespaces)
def __init__(self, rtconfig=None, env=None, version='3.0.0'): """ Initialize CSW """ if not env: self.environ = os.environ else: self.environ = env self.context = config.StaticContext() # Lazy load this when needed # (it will permanently update global cfg namespaces) self.sruobj = None self.opensearchobj = None self.oaipmhobj = None # init kvp self.kvp = {} self.mode = 'csw' self.asynchronous = False self.soap = False self.request = None self.exception = False self.status = 'OK' self.profiles = None self.manager = False self.outputschemas = {} self.mimetype = 'application/xml; charset=UTF-8' self.encoding = 'UTF-8' self.pretty_print = 0 self.domainquerytype = 'list' self.orm = 'django' self.language = {'639_code': 'en', 'text': 'english'} self.process_time_start = time() self.xslts = [] # define CSW implementation object (default CSW3) self.iface = csw3.Csw3(server_csw=self) self.request_version = version if self.request_version == '2.0.2': self.iface = csw2.Csw2(server_csw=self) self.context.set_model('csw') # load user configuration try: LOGGER.info('Loading user configuration') if isinstance(rtconfig, configparser.ConfigParser): # serialized already self.config = rtconfig else: self.config = configparser.ConfigParser( interpolation=util.EnvInterpolation()) if isinstance(rtconfig, dict): # dictionary for section, options in rtconfig.items(): self.config.add_section(section) for k, v in options.items(): self.config.set(section, k, v) else: # configuration file import codecs with codecs.open(rtconfig, encoding='utf-8') as scp: self.config.read_file(scp) except Exception as err: msg = 'Could not load configuration' LOGGER.exception('%s %s: %s', msg, rtconfig, err) self.response = self.iface.exceptionreport('NoApplicableCode', 'service', msg) return # set server.home safely # TODO: make this more abstract self.config.set( 'server', 'home', os.path.dirname(os.path.join(os.path.dirname(__file__), '..'))) if 'PYCSW_IS_CSW' in env and env['PYCSW_IS_CSW']: self.config.set('server', 'url', self.config['server']['url'].rstrip('/') + '/csw') if 'PYCSW_IS_OPENSEARCH' in env and env['PYCSW_IS_OPENSEARCH']: self.config.set( 'server', 'url', self.config['server']['url'].rstrip('/') + '/opensearch') self.mode = 'opensearch' self.context.pycsw_home = self.config.get('server', 'home') self.context.url = self.config.get('server', 'url') log.setup_logger(self.config) LOGGER.info('running configuration %s', rtconfig) LOGGER.debug('QUERY_STRING: %s', self.environ['QUERY_STRING']) # set OGC schemas location if not self.config.has_option('server', 'ogc_schemas_base'): self.config.set('server', 'ogc_schemas_base', self.context.ogc_schemas_base) # set mimetype if self.config.has_option('server', 'mimetype'): self.mimetype = self.config.get('server', 'mimetype').encode() # set encoding if self.config.has_option('server', 'encoding'): self.encoding = self.config.get('server', 'encoding') # set domainquerytype if self.config.has_option('server', 'domainquerytype'): self.domainquerytype = self.config.get('server', 'domainquerytype') # set XML pretty print if (self.config.has_option('server', 'pretty_print') and self.config.get('server', 'pretty_print') == 'true'): self.pretty_print = 1 # set Spatial Ranking option if (self.config.has_option('server', 'spatial_ranking') and self.config.get('server', 'spatial_ranking') == 'true'): util.ranking_enabled = True # set language default if self.config.has_option('server', 'language'): try: LOGGER.info('Setting language') lang_code = self.config.get('server', 'language').split('-')[0] self.language['639_code'] = lang_code self.language['text'] = self.context.languages[lang_code] except Exception as err: LOGGER.exception('Could not set language: %s', err) pass LOGGER.debug('Configuration: %s.', self.config) LOGGER.debug('Model: %s.', self.context.model) # load user-defined mappings if they exist custom_mappings_path = self.config.get('repository', 'mappings', fallback=None) if custom_mappings_path is not None: md_core_model = util.load_custom_repo_mappings( custom_mappings_path) if md_core_model is not None: self.context.md_core_model = md_core_model self.context.refresh_dc(md_core_model) else: LOGGER.exception('Could not load custom mappings: %s') self.response = self.iface.exceptionreport( 'NoApplicableCode', 'service', 'Could not load repository.mappings') # load user-defined max attempt to retry db connection try: self.max_retries = int(self.config.get("repository", "max_retries")) except configparser.NoOptionError: self.max_retries = 5 # load outputschemas LOGGER.info('Loading outputschemas') for osch in pycsw.plugins.outputschemas.__all__: output_schema_module = __import__( 'pycsw.plugins.outputschemas.%s' % osch) mod = getattr(output_schema_module.plugins.outputschemas, osch) self.outputschemas[mod.NAMESPACE] = mod LOGGER.debug('Outputschemas loaded: %s.', self.outputschemas) LOGGER.debug('Namespaces: %s', self.context.namespaces) LOGGER.info('Loading XSLT transformations') xslt_defs = [ x for x in self.config.sections() if x.startswith('xslt:') ] for x in xslt_defs: LOGGER.debug('Loading XSLT %s' % x) input_os, output_os = x.split(':', 1)[-1].split(',') self.xslts.append({x: self.config.get(x, 'xslt')})
import json from pycsw.core import admin, config with open("config.json", "r") as file_: pycsw_config = json.load(file_) context = config.StaticContext() def load(records_dir): database = pycsw_config['repository']['database'] table = pycsw_config['repository']['table'] admin.load_records( context, database, table, records_dir, True, True, ) if __name__ == "__main__": load("records")
def setup_db(database, table, home, create_sfsql_tables=True, create_plpythonu_functions=True, postgis_geometry_column='wkb_geometry', extra_columns=[], language='english', mappings_filepath = ''): """Setup database tables and indexes""" from sqlalchemy import Column, create_engine, Integer, MetaData, \ Table, Text, Unicode from sqlalchemy.orm import create_session LOGGER.info('Creating database %s', database) if database.startswith('sqlite'): dbtype, filepath = database.split('sqlite:///') dirname = os.path.dirname(filepath) if not os.path.exists(dirname): raise RuntimeError('SQLite directory %s does not exist' % dirname) dbase = create_engine(database) schema_name, table_name = table.rpartition(".")[::2] mdata = MetaData(dbase, schema=schema_name or None) create_postgis_geometry = False # If PostGIS 2.x detected, do not create sfsql tables. if dbase.name == 'postgresql': try: dbsession = create_session(dbase) for row in dbsession.execute('select(postgis_lib_version())'): postgis_lib_version = row[0] create_sfsql_tables=False create_postgis_geometry = True LOGGER.info('PostGIS %s detected: Skipping SFSQL tables creation', postgis_lib_version) except: pass if create_sfsql_tables: LOGGER.info('Creating table spatial_ref_sys') srs = Table( 'spatial_ref_sys', mdata, Column('srid', Integer, nullable=False, primary_key=True), Column('auth_name', Text), Column('auth_srid', Integer), Column('srtext', Text) ) srs.create() i = srs.insert() i.execute(srid=4326, auth_name='EPSG', auth_srid=4326, srtext='GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.01745329251994328,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]]') LOGGER.info('Creating table geometry_columns') geom = Table( 'geometry_columns', mdata, Column('f_table_catalog', Text, nullable=False), Column('f_table_schema', Text, nullable=False), Column('f_table_name', Text, nullable=False), Column('f_geometry_column', Text, nullable=False), Column('geometry_type', Integer), Column('coord_dimension', Integer), Column('srid', Integer, nullable=False), Column('geometry_format', Text, nullable=False), ) geom.create() i = geom.insert() i.execute(f_table_catalog='public', f_table_schema='public', f_table_name=table_name, f_geometry_column='wkt_geometry', geometry_type=3, coord_dimension=2, srid=4326, geometry_format='WKT') # abstract metadata information model LOGGER.info('Creating table %s', table_name) if mappings_filepath: import imp module = mappings_filepath if os.sep in module: # filepath modulename = '%s' % os.path.splitext(module)[0].replace( os.sep, '.') mappings_module = imp.load_source(modulename, module) else: # dotted name mappings_module = __import__(module, fromlist=['']) mappings = mappings_module.MD_CORE_MODEL['mappings'] else: context = config.StaticContext() mappings = context.md_core_model['mappings'] records = Table(table_name, mdata) for key,col_name in mappings.items(): if key == 'pycsw:Identifier': col = Column(col_name, Text, primary_key=True) elif key == 'pycsw:Typename': col = Column(col_name, Text, default='csw:Record', nullable=False, index=True) elif key == 'pycsw:Schema': col = Column(col_name, Text, default='http://www.opengis.net/cat/csw/2.0.2', nullable=False, index=True) elif key == 'pycsw:MdSource': col = Column(col_name, Text, default='local', nullable=False, index=True) elif key == 'pycsw:InsertDate': col = Column(col_name, Text, nullable=False, index=True) elif key == 'pycsw:XML': col = Column(col_name, Unicode, nullable=False) elif key == 'pycsw:AnyText': col = Column(col_name, Text, nullable=False) else: col = Column(col_name, Text, index=True) if not records.columns.has_key(col_name) and col_name != '': records.append_column(col) # add extra columns that may have been passed via extra_columns # extra_columns is a list of sqlalchemy.Column objects if extra_columns: LOGGER.info('Extra column definitions detected') for extra_column in extra_columns: LOGGER.info('Adding extra column: %s', extra_column) records.append_column(extra_column) records.create() conn = dbase.connect() if create_plpythonu_functions and not create_postgis_geometry: if dbase.name == 'postgresql': # create plpythonu functions within db LOGGER.info('Setting plpythonu functions') pycsw_home = home function_get_anytext = ''' CREATE OR REPLACE FUNCTION get_anytext(xml text) RETURNS text AS $$ import sys sys.path.append('%s') from pycsw.core import util return util.get_anytext(xml) $$ LANGUAGE plpythonu; ''' % pycsw_home function_query_spatial = ''' CREATE OR REPLACE FUNCTION query_spatial(bbox_data_wkt text, bbox_input_wkt text, predicate text, distance text) RETURNS text AS $$ import sys sys.path.append('%s') from pycsw.core import repository return repository.query_spatial(bbox_data_wkt, bbox_input_wkt, predicate, distance) $$ LANGUAGE plpythonu; ''' % pycsw_home function_update_xpath = ''' CREATE OR REPLACE FUNCTION update_xpath(nsmap text, xml text, recprops text) RETURNS text AS $$ import sys sys.path.append('%s') from pycsw.core import repository return repository.update_xpath(nsmap, xml, recprops) $$ LANGUAGE plpythonu; ''' % pycsw_home function_get_geometry_area = ''' CREATE OR REPLACE FUNCTION get_geometry_area(geom text) RETURNS text AS $$ import sys sys.path.append('%s') from pycsw.core import repository return repository.get_geometry_area(geom) $$ LANGUAGE plpythonu; ''' % pycsw_home function_get_spatial_overlay_rank = ''' CREATE OR REPLACE FUNCTION get_spatial_overlay_rank(target_geom text, query_geom text) RETURNS text AS $$ import sys sys.path.append('%s') from pycsw.core import repository return repository.get_spatial_overlay_rank(target_geom, query_geom) $$ LANGUAGE plpythonu; ''' % pycsw_home conn.execute(function_get_anytext) conn.execute(function_query_spatial) conn.execute(function_update_xpath) conn.execute(function_get_geometry_area) conn.execute(function_get_spatial_overlay_rank) if dbase.name == 'postgresql': LOGGER.info('Creating PostgreSQL Free Text Search (FTS) GIN index') tsvector_fts = "alter table %s add column anytext_tsvector tsvector" % table_name conn.execute(tsvector_fts) index_fts = "create index %s_fts_gin_idx on %s using gin(anytext_tsvector)" % (table_name, table_name) conn.execute(index_fts) # This needs to run if records exist "UPDATE records SET anytext_tsvector = to_tsvector('english', anytext)" trigger_fts = "create trigger ftsupdate before insert or update on %s for each row execute procedure tsvector_update_trigger('anytext_tsvector', 'pg_catalog.%s', %s)" % (table_name, language, mappings['pycsw:AnyText']) conn.execute(trigger_fts) if dbase.name == 'postgresql' and create_postgis_geometry: # create native geometry column within db LOGGER.info('Creating native PostGIS geometry column') if postgis_lib_version < '2': create_column_sql = "SELECT AddGeometryColumn('%s', '%s', 4326, 'POLYGON', 2)" % (table_name, postgis_geometry_column) else: create_column_sql = "ALTER TABLE %s ADD COLUMN %s geometry(Geometry,4326);" % (table_name, postgis_geometry_column) create_insert_update_trigger_sql = ''' DROP TRIGGER IF EXISTS %(table)s_update_geometry ON %(table)s; DROP FUNCTION IF EXISTS %(table)s_update_geometry(); CREATE FUNCTION %(table)s_update_geometry() RETURNS trigger AS $%(table)s_update_geometry$ BEGIN IF NEW.%(bounding_box_column)s IS NULL THEN RETURN NEW; END IF; NEW.%(geometry)s := ST_GeomFromText(NEW.%(bounding_box_column)s,4326); RETURN NEW; END; $%(table)s_update_geometry$ LANGUAGE plpgsql; CREATE TRIGGER %(table)s_update_geometry BEFORE INSERT OR UPDATE ON %(table)s FOR EACH ROW EXECUTE PROCEDURE %(table)s_update_geometry(); ''' % {'table': table_name, 'geometry': postgis_geometry_column, 'bounding_box_column': mappings['pycsw:BoundingBox']} create_spatial_index_sql = 'CREATE INDEX %(table)s_%(geometry)s_idx ON %(table)s USING GIST (%(geometry)s);' \ % {'table': table_name, 'geometry': postgis_geometry_column} conn.execute(create_column_sql) conn.execute(create_insert_update_trigger_sql) conn.execute(create_spatial_index_sql)
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. # # ================================================================= from six.moves import configparser from six.moves import input import getopt import sys from pycsw.core import admin, config CONTEXT = config.StaticContext() def usage(): """Provide usage instructions""" return ''' NAME pycsw-admin.py - pycsw admin utility SYNOPSIS pycsw-admin.py -c <command> -f <cfg> [-h] [-p /path/to/records] [-r] Available options: -c Command to be performed: - setup_db