def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('config', help='path of configuration file') parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity') args = parser.parse_args() # logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout) logging.basicConfig( level=logging.INFO if args.verbose else logging.WARNING, stream=sys.stdout) site_conf = appconfig('config:{}'.format(os.path.abspath(args.config))) load_environment(site_conf.global_conf, site_conf.local_conf) registry = Registry() registry.prepare() registry.register(pylons.translator, MockTranslator()) plugins.load('synchronous_search') # model.repo.new_revision() # for resource in model.Session.query(model.Resource).filter( # model.Resource.url.like('%etalab2.fr%'), # ): # url = resource.url # if url.startswith('http://ckan.etalab2.fr/'): # resource.url = resource.url.replace('http://ckan.etalab2.fr/', 'http://www.data.gouv.fr/fr/') # elif url.startswith('http://ckan-hetic.etalab2.fr/'): # resource.url = resource.url.replace('http://ckan-hetic.etalab2.fr/', 'http://www.data.gouv.fr/fr/') # elif url.startswith('http://www.etalab2.fr/'): # resource.url = resource.url.replace('http://www.etalab2.fr/', 'http://www.data.gouv.fr/') # else: # print resource.url # model.repo.commit_and_remove() # model.repo.new_revision() # for resource in model.Session.query(model.Resource).filter( # model.Resource.url.like('%www.data.gouv.fr%'), # ): # if resource.url.startswith('http://www.data.gouv.fr/') and not resource.url.startswith('http://www.data.gouv.fr/var'): # resource.url = resource.url.replace('http://www.data.gouv.fr/', 'http://new.data.gouv.fr/') # model.repo.commit_and_remove() model.repo.new_revision() for resource in model.Session.query(model.Resource).filter( model.Resource.url.like('http://new.data.gouv.fr/%'), ): resource.url = resource.url.replace('http://new.data.gouv.fr/', 'https://www.data.gouv.fr/') model.repo.commit_and_remove() model.repo.new_revision() for resource in model.Session.query(model.Resource).filter( model.Resource.url.like('https://new.data.gouv.fr/%'), ): resource.url = resource.url.replace('https://new.data.gouv.fr/', 'https://www.data.gouv.fr/') model.repo.commit_and_remove() return 0
def load_config(config: Any, load_site_user: bool = True): conf = _get_config(config) assert 'ckan' not in dir() # otherwise loggers would be disabled # We have now loaded the config. Now we can import ckan for the # first time. from ckan.config.environment import load_environment load_environment(conf) # Set this internal test request context with the configured environment so # it can be used when calling url_for from the CLI. global _cli_test_request_context app = make_app(conf) flask_app = app.apps['flask_app']._wsgi_app _cli_test_request_context = flask_app.test_request_context() registry = Registry() registry.prepare() site_user = None if model.user_table.exists() and load_site_user: site_user = logic.get_action('get_site_user')({ 'ignore_auth': True }, {}) ## give routes enough information to run url_for parsed = urlparse(cast(str, conf.get('ckan.site_url', 'http://0.0.0.0'))) request_config = routes.request_config() request_config.host = parsed.netloc + parsed.path request_config.protocol = parsed.scheme return site_user
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('config', help='path of configuration file') parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity') args = parser.parse_args() # logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout) logging.basicConfig( level=logging.INFO if args.verbose else logging.WARNING, stream=sys.stdout) site_conf = appconfig('config:{}'.format(os.path.abspath(args.config))) load_environment(site_conf.global_conf, site_conf.local_conf) registry = Registry() registry.prepare() registry.register(pylons.translator, MockTranslator()) plugins.load('synchronous_search') model.repo.new_revision() for package_extra in model.Session.query(model.PackageExtra).filter( model.PackageExtra.key == 'supplier_id', model.PackageExtra.package_id.in_( model.Session.query(model.Package.id).filter( model.Package.name.like('%-fork-%'))), ): model.Session.delete(package_extra) model.repo.commit_and_remove() return 0
def main(): parser = argparse.ArgumentParser(description = __doc__) parser.add_argument('config', help = 'path of configuration file') parser.add_argument('-v', '--verbose', action = 'store_true', help = 'increase output verbosity') args = parser.parse_args() # logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout) logging.basicConfig(level = logging.INFO if args.verbose else logging.WARNING, stream = sys.stdout) site_conf = appconfig('config:{}'.format(os.path.abspath(args.config))) load_environment(site_conf.global_conf, site_conf.local_conf) registry = Registry() registry.prepare() registry.register(pylons.translator, MockTranslator()) plugins.load('synchronous_search') model.repo.new_revision() for package_extra in model.Session.query(model.PackageExtra).filter( model.PackageExtra.key == 'territorial_coverage_granularity', model.PackageExtra.value == 'france', ): package = package_extra.package if package.private or package.state != 'active': log.warning(u'Territorial coverage granularity of package {} must be manually corrected'.format( package.name)) continue package_extra.value = 'pays' model.repo.commit_and_remove() return 0
def make_app(conf, full_stack=True, static_files=True, **app_conf): ''' Initialise both the pylons and flask apps, and wrap them in dispatcher middleware. ''' load_environment(conf, app_conf) flask_app = make_flask_stack(conf, **app_conf) if six.PY2: pylons_app = make_pylons_stack(conf, full_stack, static_files, **app_conf) app = AskAppDispatcherMiddleware({ 'pylons_app': pylons_app, 'flask_app': flask_app }) else: app = flask_app # Set this internal test request context with the configured environment so # it can be used when calling url_for from tests global _internal_test_request_context _internal_test_request_context = flask_app._wsgi_app.test_request_context() return app
def main(): parser = argparse.ArgumentParser(description = __doc__) parser.add_argument('config', help = 'path of configuration file') parser.add_argument('-v', '--verbose', action = 'store_true', help = 'increase output verbosity') args = parser.parse_args() # logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout) logging.basicConfig(level = logging.INFO if args.verbose else logging.WARNING, stream = sys.stdout) site_conf = appconfig('config:{}'.format(os.path.abspath(args.config))) load_environment(site_conf.global_conf, site_conf.local_conf) registry = Registry() registry.prepare() registry.register(pylons.translator, MockTranslator()) plugins.load('synchronous_search') model.repo.new_revision() for package_extra in model.Session.query(model.PackageExtra).filter( model.PackageExtra.key == 'supplier_id', model.PackageExtra.package_id.in_( model.Session.query(model.Package.id).filter(model.Package.name.like('%-fork-%'))), ): model.Session.delete(package_extra) model.repo.commit_and_remove() return 0
def _load_ckan_environment(ini_path): ''' Load CKAN environment. ''' ini_path = os.path.abspath(ini_path) conf = paste.deploy.appconfig('config:' + ini_path) load_environment(conf.global_conf, conf.local_conf) _register_translator()
def setup_app(command, conf, vars): """Place any commands to setup ckan here""" load_environment(conf.global_conf, conf.local_conf) from ckan import model log.debug('Creating tables') model.repo.create_db() log.info('Creating tables: SUCCESS')
def load_config(ini_path): """ Load CKAN configuration. """ ini_path = os.path.abspath(ini_path) logging.config.fileConfig(ini_path, disable_existing_loggers=False) conf = paste.deploy.appconfig('config:' + ini_path) load_environment(conf.global_conf, conf.local_conf) _register_translator()
def main(): parser = argparse.ArgumentParser(description = __doc__) parser.add_argument('config', help = 'path of configuration file') parser.add_argument('-v', '--verbose', action = 'store_true', help = 'increase output verbosity') args = parser.parse_args() # logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout) logging.basicConfig(level = logging.INFO if args.verbose else logging.WARNING, stream = sys.stdout) site_conf = appconfig('config:{}'.format(os.path.abspath(args.config))) load_environment(site_conf.global_conf, site_conf.local_conf) registry = Registry() registry.prepare() registry.register(pylons.translator, MockTranslator()) plugins.load('synchronous_search') # model.repo.new_revision() # for resource in model.Session.query(model.Resource).filter( # model.Resource.url.like('%etalab2.fr%'), # ): # url = resource.url # if url.startswith('http://ckan.etalab2.fr/'): # resource.url = resource.url.replace('http://ckan.etalab2.fr/', 'http://www.data.gouv.fr/fr/') # elif url.startswith('http://ckan-hetic.etalab2.fr/'): # resource.url = resource.url.replace('http://ckan-hetic.etalab2.fr/', 'http://www.data.gouv.fr/fr/') # elif url.startswith('http://www.etalab2.fr/'): # resource.url = resource.url.replace('http://www.etalab2.fr/', 'http://www.data.gouv.fr/') # else: # print resource.url # model.repo.commit_and_remove() # model.repo.new_revision() # for resource in model.Session.query(model.Resource).filter( # model.Resource.url.like('%www.data.gouv.fr%'), # ): # if resource.url.startswith('http://www.data.gouv.fr/') and not resource.url.startswith('http://www.data.gouv.fr/var'): # resource.url = resource.url.replace('http://www.data.gouv.fr/', 'http://new.data.gouv.fr/') # model.repo.commit_and_remove() model.repo.new_revision() for resource in model.Session.query(model.Resource).filter( model.Resource.url.like('http://new.data.gouv.fr/%'), ): resource.url = resource.url.replace('http://new.data.gouv.fr/', 'https://www.data.gouv.fr/') model.repo.commit_and_remove() model.repo.new_revision() for resource in model.Session.query(model.Resource).filter( model.Resource.url.like('https://new.data.gouv.fr/%'), ): resource.url = resource.url.replace('https://new.data.gouv.fr/', 'https://www.data.gouv.fr/') model.repo.commit_and_remove() return 0
def main(): parser = argparse.ArgumentParser(description = __doc__) parser.add_argument('config', help = 'path of configuration file') parser.add_argument('-t', '--territoria-url', help = 'Territoria URL', required = True) parser.add_argument('-v', '--verbose', action = 'store_true', help = 'increase output verbosity') args = parser.parse_args() # logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout) logging.basicConfig(level = logging.INFO if args.verbose else logging.WARNING, stream = sys.stdout) site_conf = appconfig('config:{}'.format(os.path.abspath(args.config))) load_environment(site_conf.global_conf, site_conf.local_conf) registry = Registry() registry.prepare() registry.register(pylons.translator, MockTranslator()) plugins.load('synchronous_search') model.repo.new_revision() kind_code_name_by_kind_code = {} for package_extra in model.Session.query(model.PackageExtra).filter( model.PackageExtra.key == 'territorial_coverage', ): if package_extra.value == 'Coutry/FR': kind_code_name = 'Country/FR/FRANCE' elif package_extra.value == 'InternationalOrganization/EU': kind_code_name = 'InternationalOrganization/UE/UNION EUROPEENNE' elif package_extra.value.count('/') == 1: kind_code_name = kind_code_name_by_kind_code.get(package_extra.value) if kind_code_name is None: kind, code = package_extra.value.split('/') try: response = urllib2.urlopen(urlparse.urljoin(args.territoria_url, '/api/v1/territory?{}'.format(urllib.urlencode(dict( code = code, kind = kind, ), doseq = True)))) except urllib2.HTTPError, response: print package_extra.value raise response_dict = json.loads(response.read()) main_postal_distribution = response_dict['data']['main_postal_distribution'] kind_code_name_by_kind_code[package_extra.value] = kind_code_name = u'/'.join([kind, code, main_postal_distribution]) print kind_code_name else: continue package = package_extra.package if package.private or package.state != 'active': log.warning(u'Territorial coverage of package {} must be manually corrected'.format(package.name)) continue package_extra.value = kind_code_name
def _load_config(self): from paste.deploy import appconfig from ckan.config.environment import load_environment if not self.options.config: msg = "No config file supplied" raise self.BadCommand(msg) self.filename = os.path.abspath(self.options.config) try: fileConfig(self.filename) except Exception: pass conf = appconfig("config:" + self.filename) load_environment(conf.global_conf, conf.local_conf)
def dbsetup(): from paste.registry import Registry from paste.script.util.logging_config import fileConfig from paste.deploy import appconfig filename = os.path.abspath("../ckan/development.ini") if not os.path.exists(filename): raise AssertionError('Config filename %r does not exist.' % filename) fileConfig(filename) conf = appconfig('config:' + filename) assert 'ckan' not in dir() # otherwise loggers would be disabled # We have now loaded the config. Now we can import ckan for the # first time. from ckan.config.environment import load_environment load_environment(conf.global_conf, conf.local_conf)
def make_app(conf): ''' Initialise the Flask app and wrap it in dispatcher middleware. ''' load_environment(conf) flask_app = make_flask_stack(conf) # Set this internal test request context with the configured environment so # it can be used when calling url_for from tests global _internal_test_request_context _internal_test_request_context = flask_app._wsgi_app.test_request_context() return flask_app
def make_app(conf, full_stack=True, static_files=True, **app_conf): ''' Initialise both the pylons and flask apps, and wrap them in dispatcher middleware. ''' load_environment(conf, app_conf) pylons_app = make_pylons_stack(conf, full_stack, static_files, **app_conf) flask_app = make_flask_stack(conf, **app_conf) app = AskAppDispatcherMiddleware({'pylons_app': pylons_app, 'flask_app': flask_app}) return app
def _load_config(self): from paste.deploy import appconfig from ckan.config.environment import load_environment if not self.options.config: msg = 'No config file supplied' raise self.BadCommand(msg) self.filename = os.path.abspath(self.options.config) if not os.path.exists(self.filename): raise AssertionError('Config filename %r does not exist.' % self.filename) fileConfig(self.filename) conf = appconfig('config:' + self.filename) load_environment(conf.global_conf, conf.local_conf) self.registry=Registry() self.registry.prepare() import pylons self.translator_obj = MockTranslator() self.registry.register(pylons.translator, self.translator_obj)
def _load_config(self): from paste.deploy import appconfig from ckan.config.environment import load_environment if not self.options.config: msg = 'No config file supplied' raise self.BadCommand(msg) self.filename = os.path.abspath(self.options.config) try: fileConfig(self.filename) except Exception: pass conf = appconfig('config:' + self.filename) load_environment(conf.global_conf, conf.local_conf) self.registry=Registry() self.registry.prepare() import pylons self.translator_obj = MockTranslator() self.registry.register(pylons.translator, self.translator_obj)
def load_config(config, load_site_user=True): conf = _get_config(config) assert 'ckan' not in dir() # otherwise loggers would be disabled # We have now loaded the config. Now we can import ckan for the # first time. from ckan.config.environment import load_environment load_environment(conf) # Set this internal test request context with the configured environment so # it can be used when calling url_for from the CLI. global _cli_test_request_context app = make_app(conf) flask_app = app.apps['flask_app']._wsgi_app _cli_test_request_context = flask_app.test_request_context() registry = Registry() registry.prepare() import pylons registry.register(pylons.translator, MockTranslator()) site_user = None if model.user_table.exists() and load_site_user: # If the DB has already been initialized, create and register # a pylons context object, and add the site user to it, so the # auth works as in a normal web request c = pylons.util.AttribSafeContextObj() registry.register(pylons.c, c) site_user = logic.get_action('get_site_user')({ 'ignore_auth': True }, {}) pylons.c.user = site_user['name'] pylons.c.userobj = model.User.get(site_user['name']) ## give routes enough information to run url_for parsed = urlparse(conf.get('ckan.site_url', 'http://0.0.0.0')) request_config = routes.request_config() request_config.host = parsed.netloc + parsed.path request_config.protocol = parsed.scheme return site_user
def _load_config(self): from paste.deploy import appconfig if not self.options.config: msg = 'No config file supplied' raise self.BadCommand(msg) self.filename = os.path.abspath(self.options.config) if not os.path.exists(self.filename): raise AssertionError('Config filename %r does not exist.' % self.filename) fileConfig(self.filename) conf = appconfig('config:' + self.filename) assert 'ckan' not in dir() # otherwise loggers would be disabled # We have now loaded the config. Now we can import ckan for the # first time. from ckan.config.environment import load_environment load_environment(conf.global_conf, conf.local_conf) self.registry=Registry() self.registry.prepare() import pylons self.translator_obj = MockTranslator() self.registry.register(pylons.translator, self.translator_obj)
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('config', help='path of configuration file') parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity') args = parser.parse_args() # logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout) logging.basicConfig( level=logging.INFO if args.verbose else logging.WARNING, stream=sys.stdout) site_conf = appconfig('config:{}'.format(os.path.abspath(args.config))) load_environment(site_conf.global_conf, site_conf.local_conf) registry = Registry() registry.prepare() registry.register(pylons.translator, MockTranslator()) plugins.load('synchronous_search') model.repo.new_revision() for package_extra in model.Session.query(model.PackageExtra).filter( model.PackageExtra.key == 'territorial_coverage_granularity', model.PackageExtra.value == 'france', ): package = package_extra.package if package.private or package.state != 'active': log.warning( u'Territorial coverage granularity of package {} must be manually corrected' .format(package.name)) continue package_extra.value = 'pays' model.repo.commit_and_remove() return 0
def main(): parser = argparse.ArgumentParser(description = __doc__) parser.add_argument('config', help = 'path of configuration file') parser.add_argument('-v', '--verbose', action = 'store_true', help = 'increase output verbosity') args = parser.parse_args() # logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout) logging.basicConfig(level = logging.INFO if args.verbose else logging.WARNING, stream = sys.stdout) site_conf = appconfig('config:{}'.format(os.path.abspath(args.config))) load_environment(site_conf.global_conf, site_conf.local_conf) registry = Registry() registry.prepare() registry.register(pylons.translator, MockTranslator()) plugins.load('synchronous_search') model.repo.new_revision() for organization_name, is_service_public in sorted(is_public_service_by_organization_name.iteritems()): if not is_service_public: continue organization = model.Session.query(model.Group).filter( model.Group.is_organization == True, model.Group.name == organization_name, ).first() if organization is None: log.warning(u'Unknown organization: {}'.format(organization_name)) continue if organization.certified_public_service is None: log.info(u'Certifying "{}" as public service'.format(organization_name)) certified_public_service = etalab_model.CertifiedPublicService() certified_public_service.organization_id = organization.id model.Session.add(certified_public_service) model.repo.commit_and_remove() return 0
def make_app(global_conf, full_stack=True, static_files=True, **app_conf): """Create a Pylons WSGI application and return it ``global_conf`` The inherited configuration for this application. Normally from the [DEFAULT] section of the Paste ini file. ``full_stack`` Whether this application provides a full WSGI stack (by default, meaning it handles its own exceptions and errors). Disable full_stack when this application is "managed" by another WSGI middleware. ``static_files`` Whether this application serves its own static files; disable when another web server is responsible for serving them. ``app_conf`` The application's local configuration. Normally specified in the [app:<name>] section of the Paste ini file (where <name> defaults to main). """ # Configure the Pylons environment load_environment(global_conf, app_conf) # The Pylons WSGI app app = PylonsApp() for plugin in PluginImplementations(IMiddleware): app = plugin.make_middleware(app, config) # Routing/Session/Cache Middleware app = RoutesMiddleware(app, config['routes.map']) app = SessionMiddleware(app, config) app = CacheMiddleware(app, config) # CUSTOM MIDDLEWARE HERE (filtered by error handling middlewares) #app = QueueLogMiddleware(app) if asbool(full_stack): # Handle Python exceptions app = ErrorHandler(app, global_conf, **config['pylons.errorware']) # Display error documents for 401, 403, 404 status codes (and # 500 when debug is disabled) if asbool(config['debug']): app = StatusCodeRedirect(app, [400, 404]) else: app = StatusCodeRedirect(app, [400, 404, 500]) # Initialize repoze.who who_parser = WhoConfig(global_conf['here']) who_parser.parse(open(app_conf['who.config_file'])) if asbool(config.get('openid_enabled', 'true')): from repoze.who.plugins.openid.identification import OpenIdIdentificationPlugin who_parser.identifiers = [i for i in who_parser.identifiers if \ not isinstance(i, OpenIdIdentificationPlugin)] who_parser.challengers = [i for i in who_parser.challengers if \ not isinstance(i, OpenIdIdentificationPlugin)] app = PluggableAuthenticationMiddleware(app, who_parser.identifiers, who_parser.authenticators, who_parser.challengers, who_parser.mdproviders, who_parser.request_classifier, who_parser.challenge_decider, logging.getLogger('repoze.who'), logging.WARN, # ignored who_parser.remote_user_key, ) # Establish the Registry for this application app = RegistryManager(app) if asbool(static_files): # Serve static files static_max_age = None if not asbool(config.get('ckan.cache_enabled')) \ else int(config.get('ckan.static_max_age', 3600)) static_app = StaticURLParser(config['pylons.paths']['static_files'], cache_max_age=static_max_age) static_parsers = [static_app, app] # Configurable extra static file paths extra_static_parsers = [] for public_path in config.get('extra_public_paths', '').split(','): if public_path.strip(): extra_static_parsers.append( StaticURLParser(public_path.strip(), cache_max_age=static_max_age) ) app = Cascade(extra_static_parsers+static_parsers) return app
def make_pylons_stack(conf, full_stack=True, static_files=True, **app_conf): """Create a Pylons WSGI application and return it ``conf`` The inherited configuration for this application. Normally from the [DEFAULT] section of the Paste ini file. ``full_stack`` Whether this application provides a full WSGI stack (by default, meaning it handles its own exceptions and errors). Disable full_stack when this application is "managed" by another WSGI middleware. ``static_files`` Whether this application serves its own static files; disable when another web server is responsible for serving them. ``app_conf`` The application's local configuration. Normally specified in the [app:<name>] section of the Paste ini file (where <name> defaults to main). """ # Configure the Pylons environment load_environment(conf, app_conf) # The Pylons WSGI app app = pylons_app = CKANPylonsApp() # set pylons globals app_globals.reset() for plugin in PluginImplementations(IMiddleware): app = plugin.make_middleware(app, config) # Routing/Session/Cache Middleware app = RoutesMiddleware(app, config['routes.map']) # we want to be able to retrieve the routes middleware to be able to update # the mapper. We store it in the pylons config to allow this. config['routes.middleware'] = app app = SessionMiddleware(app, config) app = CacheMiddleware(app, config) # CUSTOM MIDDLEWARE HERE (filtered by error handling middlewares) # app = QueueLogMiddleware(app) if asbool(config.get('ckan.use_pylons_response_cleanup_middleware', True)): app = execute_on_completion(app, config, cleanup_pylons_response_string) # Fanstatic if asbool(config.get('debug', False)): fanstatic_config = { 'versioning': True, 'recompute_hashes': True, 'minified': False, 'bottom': True, 'bundle': False, } else: fanstatic_config = { 'versioning': True, 'recompute_hashes': False, 'minified': True, 'bottom': True, 'bundle': True, } app = Fanstatic(app, **fanstatic_config) for plugin in PluginImplementations(IMiddleware): try: app = plugin.make_error_log_middleware(app, config) except AttributeError: log.critical('Middleware class {0} is missing the method' 'make_error_log_middleware.' .format(plugin.__class__.__name__)) if asbool(full_stack): # Handle Python exceptions app = ErrorHandler(app, conf, **config['pylons.errorware']) # Display error documents for 400, 403, 404 status codes (and # 500 when debug is disabled) if asbool(config['debug']): app = StatusCodeRedirect(app, [400, 403, 404]) else: app = StatusCodeRedirect(app, [400, 403, 404, 500]) # Initialize repoze.who who_parser = WhoConfig(conf['here']) who_parser.parse(open(app_conf['who.config_file'])) app = PluggableAuthenticationMiddleware( app, who_parser.identifiers, who_parser.authenticators, who_parser.challengers, who_parser.mdproviders, who_parser.request_classifier, who_parser.challenge_decider, logging.getLogger('repoze.who'), logging.WARN, # ignored who_parser.remote_user_key ) # Establish the Registry for this application app = RegistryManager(app) app = common_middleware.I18nMiddleware(app, config) if asbool(static_files): # Serve static files static_max_age = None if not asbool(config.get('ckan.cache_enabled')) \ else int(config.get('ckan.static_max_age', 3600)) static_app = StaticURLParser(config['pylons.paths']['static_files'], cache_max_age=static_max_age) static_parsers = [static_app, app] storage_directory = uploader.get_storage_path() if storage_directory: path = os.path.join(storage_directory, 'storage') try: os.makedirs(path) except OSError, e: # errno 17 is file already exists if e.errno != 17: raise storage_app = StaticURLParser(path, cache_max_age=static_max_age) static_parsers.insert(0, storage_app) # Configurable extra static file paths extra_static_parsers = [] for public_path in config.get('extra_public_paths', '').split(','): if public_path.strip(): extra_static_parsers.append( StaticURLParser(public_path.strip(), cache_max_age=static_max_age) ) app = Cascade(extra_static_parsers + static_parsers)
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('config', help='path of configuration file') parser.add_argument('user', help='name of email of user') parser.add_argument( '-d', '--dry-run', action='store_true', help="simulate harvesting, don't update CKAN repository") parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity') args = parser.parse_args() # logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout) logging.basicConfig( level=logging.INFO if args.verbose else logging.WARNING, stream=sys.stdout) site_conf = appconfig('config:{}'.format(os.path.abspath(args.config))) load_environment(site_conf.global_conf, site_conf.local_conf) registry = Registry() registry.prepare() registry.register(pylons.translator, MockTranslator()) plugins.load('synchronous_search') user_name = args.user.lower().replace('.', '-dot-').replace('@', '-at-') user = model.Session.query( model.User).filter(model.User.name == user_name).one() assert user is not None, 'Unknown user: {}'.format(user_name) for membership_request in model.Session.query( youckan_model.MembershipRequest).filter( youckan_model.MembershipRequest.user_id == user.id, ): log.warning( u'Deleting membership request: {}'.format(membership_request)) if not args.dry_run: model.Session.delete(membership_request) model.Session.commit() for related in model.Session.query( model.Related).filter(model.Related.owner_id == user.id): log.warning(u'Deleting related: {}'.format(related)) if not args.dry_run: model.Session.delete(related) model.Session.commit() if not args.dry_run: model.repo.new_revision() for user_object_role in model.Session.query(model.UserObjectRole).filter( model.UserObjectRole.user_id == user.id, model.UserObjectRole.role == 'admin', ): if user_object_role.context == 'Group': group = user_object_role.group log.warning(u'Deleting group or organization: {}'.format(group)) if not args.dry_run: model.Session.query( etalab_model.CertifiedPublicService).filter( etalab_model.CertifiedPublicService.organization_id == group.id, ).delete() model.Session.delete(group) else: assert user_object_role.context == 'Package', 'Unexpected context for role: {}'.format( user_object_role.context) package = user_object_role.package # Delete resource_revision before purging package, to avoid IntegrityError: update or delete on table # "resource_group" violates foreign key constraint "resource_revision_resource_group_id_fkey" on table # "resource_revision". for resource_group in model.Session.query( model.ResourceGroup).filter( model.ResourceGroup.package_id == package.id, ): for resource_revision in model.Session.query( model.ResourceRevision).filter( model.ResourceRevision.resource_group_id == resource_group.id, ): if not args.dry_run: log.warning(u'Deleting resource_revision') model.Session.delete(resource_revision) # Delete package_relationship_revision before purging package, to avoid IntegrityError: update or # delete on table "package" violates foreign key constraint # "package_relationship_revision_subject_package_id_fkey" on table "package_relationship_revision". for package_relationship_revision in model.Session.query( model.PackageRelationshipRevision).filter( model.PackageRelationshipRevision.subject_package_id == package.id, ): if not args.dry_run: log.warning(u'Deleting package_relationship_revision') model.Session.delete(package_relationship_revision) for package_relationship_revision in model.Session.query( model.PackageRelationshipRevision).filter( model.PackageRelationshipRevision.object_package_id == package.id, ): if not args.dry_run: log.warning(u'Deleting package_relationship_revision') model.Session.delete(package_relationship_revision) log.warning(u'Deleting package: {}'.format(package)) if not args.dry_run: model.Session.delete(package) if not args.dry_run: model.repo.commit_and_remove() if not args.dry_run: log.warning(u'Deleting user: {}'.format(user)) model.Session.delete(user) model.Session.commit() return 0
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('config', help='path of configuration file') parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity') args = parser.parse_args() # logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout) logging.basicConfig( level=logging.INFO if args.verbose else logging.WARNING, stream=sys.stdout) site_conf = appconfig('config:{}'.format(os.path.abspath(args.config))) load_environment(site_conf.global_conf, site_conf.local_conf) registry = Registry() registry.prepare() registry.register(pylons.translator, MockTranslator()) plugins.load('synchronous_search') model.repo.new_revision() for package in model.Session.query(model.Package).filter( model.Package.owner_org != None, model.Package.state != 'deleted', ): organization = model.Session.query(model.Group).get(package.owner_org) if organization is None and package.state != 'active': log.warning( u'Purging package "{}" whose organization is missing'.format( package.name)) package.purge() continue assert organization is not None assert organization.is_organization assert organization.state != 'deleted', str((organization, package)) member = model.Session.query(model.Member).filter( model.Member.group_id == organization.id, model.Member.state == 'active', model.Member.table_id == package.id, ).first() if member is None: log.warning( u'Repairing organization "{}" package "{}" membership'.format( organization.name, package.name)) member = model.Session.query(model.Member).filter( model.Member.group_id == organization.id, model.Member.table_id == package.id, ).first() assert member is not None if member.capacity != 'organization': member.capacity = 'organization' member.state = 'active' assert member.table_name == 'package' else: if member.capacity != 'organization': log.warning( u'Repairing capacity organization "{}" package "{}" membership' .format(organization, package)) member.capacity = 'organization' assert member.table_name == 'package' continue for organization in model.Session.query(model.Group).filter( model.Group.is_organization == True, model.Group.state == 'active', ): for member in model.Session.query(model.Member).filter( model.Member.capacity != 'organization', model.Member.group_id == organization.id, model.Member.state == 'active', model.Member.table_name == 'package', ): package = model.Session.query(model.Package).get(member.table_id) if package is None: log.warning( u"Purging member of organization {} with capacity {}, whose package doesn't exist" .format(organization.name, member.capacity)) member.purge() else: log.warning( u'Repairing capacity organization "{}" package "{}" membership' .format(organization.name, package)) member.capacity = 'organization' member_by_table_id_by_group_id = {} for member in model.Session.query(model.Member).filter( model.Member.state == 'active', ): member_by_table_id = member_by_table_id_by_group_id.setdefault( member.group_id, {}) if member.table_id in member_by_table_id: log.warning( u"Group {} contains several time the same object:\n {}\n {}". format(member.group.name, member_by_table_id[member.table_id], member)) member.purge() continue member_by_table_id[member.table_id] = member model.repo.commit_and_remove() return 0
def make_pylons_stack(conf, full_stack=True, static_files=True, **app_conf): """Create a Pylons WSGI application and return it ``conf`` The inherited configuration for this application. Normally from the [DEFAULT] section of the Paste ini file. ``full_stack`` Whether this application provides a full WSGI stack (by default, meaning it handles its own exceptions and errors). Disable full_stack when this application is "managed" by another WSGI middleware. ``static_files`` Whether this application serves its own static files; disable when another web server is responsible for serving them. ``app_conf`` The application's local configuration. Normally specified in the [app:<name>] section of the Paste ini file (where <name> defaults to main). """ # Configure the Pylons environment load_environment(conf, app_conf) # The Pylons WSGI app app = PylonsApp() # set pylons globals app_globals.reset() for plugin in PluginImplementations(IMiddleware): app = plugin.make_middleware(app, config) # Routing/Session/Cache Middleware app = RoutesMiddleware(app, config['routes.map']) # we want to be able to retrieve the routes middleware to be able to update # the mapper. We store it in the pylons config to allow this. config['routes.middleware'] = app app = SessionMiddleware(app, config) app = CacheMiddleware(app, config) # CUSTOM MIDDLEWARE HERE (filtered by error handling middlewares) # app = QueueLogMiddleware(app) if asbool(config.get('ckan.use_pylons_response_cleanup_middleware', True)): app = execute_on_completion(app, config, cleanup_pylons_response_string) # Fanstatic if asbool(config.get('debug', False)): fanstatic_config = { 'versioning': True, 'recompute_hashes': True, 'minified': False, 'bottom': True, 'bundle': False, } else: fanstatic_config = { 'versioning': True, 'recompute_hashes': False, 'minified': True, 'bottom': True, 'bundle': True, } app = Fanstatic(app, **fanstatic_config) for plugin in PluginImplementations(IMiddleware): try: app = plugin.make_error_log_middleware(app, config) except AttributeError: log.critical('Middleware class {0} is missing the method' 'make_error_log_middleware.'.format( plugin.__class__.__name__)) if asbool(full_stack): # Handle Python exceptions app = ErrorHandler(app, conf, **config['pylons.errorware']) # Display error documents for 400, 403, 404 status codes (and # 500 when debug is disabled) if asbool(config['debug']): app = StatusCodeRedirect(app, [400, 403, 404]) else: app = StatusCodeRedirect(app, [400, 403, 404, 500]) # Initialize repoze.who who_parser = WhoConfig(conf['here']) who_parser.parse(open(app_conf['who.config_file'])) app = PluggableAuthenticationMiddleware( app, who_parser.identifiers, who_parser.authenticators, who_parser.challengers, who_parser.mdproviders, who_parser.request_classifier, who_parser.challenge_decider, logging.getLogger('repoze.who'), logging.WARN, # ignored who_parser.remote_user_key) # Establish the Registry for this application app = RegistryManager(app) app = I18nMiddleware(app, config) if asbool(static_files): # Serve static files static_max_age = None if not asbool(config.get('ckan.cache_enabled')) \ else int(config.get('ckan.static_max_age', 3600)) static_app = StaticURLParser(config['pylons.paths']['static_files'], cache_max_age=static_max_age) static_parsers = [static_app, app] storage_directory = uploader.get_storage_path() if storage_directory: path = os.path.join(storage_directory, 'storage') try: os.makedirs(path) except OSError, e: # errno 17 is file already exists if e.errno != 17: raise storage_app = StaticURLParser(path, cache_max_age=static_max_age) static_parsers.insert(0, storage_app) # Configurable extra static file paths extra_static_parsers = [] for public_path in config.get('extra_public_paths', '').split(','): if public_path.strip(): extra_static_parsers.append( StaticURLParser(public_path.strip(), cache_max_age=static_max_age)) app = Cascade(extra_static_parsers + static_parsers)
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('config', help='path of configuration file') parser.add_argument('-g', '--go', action='store_true', help='Change URLs of files') parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity') args = parser.parse_args() # logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout) logging.basicConfig( level=logging.INFO if args.verbose else logging.WARNING, stream=sys.stdout) site_conf = appconfig('config:{}'.format(os.path.abspath(args.config))) load_environment(site_conf.global_conf, site_conf.local_conf) registry = Registry() registry.prepare() registry.register(pylons.translator, MockTranslator()) plugins.load('synchronous_search') bad_resources_url = set() while True: model.repo.new_revision() resources_found = False resource_index = 0 for resource in model.Session.query(model.Resource).filter( model.Resource.url.like('http://www.data.gouv.fr/%'), ): resource_url, error = conv.pipe( conv.make_input_to_url(full=True), conv.not_none, )(resource.url, state=conv.default_state) if error is not None: continue resource_url = resource_url.encode('utf-8') if resource_url.startswith(('http://static.data.gouv.fr/', 'https://static.data.gouv.fr/')): continue if not resource_url.startswith( ('http://www.data.gouv.fr/', 'https://www.data.gouv.fr/')): continue if resource_url in bad_resources_url: continue resource_url_path = urlparse.urlsplit(resource_url).path print resource_url try: response = urllib2.urlopen(resource_url, timeout=30) except socket.timeout: resources_found = True continue except urllib2.HTTPError: bad_resources_url.add(resource_url) continue except urllib2.URLError: bad_resources_url.add(resource_url) continue resources_found = True resource_buffer = response.read() resource_hash = hashlib.sha256(resource_buffer).hexdigest() resource_url_path = '{}/{}{}'.format( resource_hash[:2], resource_hash[2:], os.path.splitext(resource_url_path)[-1]) resource_path = '/tmp/resources/{}'.format(resource_url_path) print ' ', resource_path dir = os.path.dirname(resource_path) if not os.path.exists(dir): os.makedirs(dir) with open(resource_path, 'w') as resource_file: resource_file.write(resource_buffer) if args.go: resource.url = 'http://static.data.gouv.fr/{}'.format( resource_url_path) resource_index += 1 if resource_index >= 5: break if resources_found: model.repo.commit_and_remove() else: break if not args.go: print 'WARNING: URLs have not been modified. Transfer images then use the --go option.' return 0
def main(): parser = argparse.ArgumentParser(description = __doc__) parser.add_argument('config', help = 'path of configuration file') parser.add_argument('-v', '--verbose', action = 'store_true', help = 'increase output verbosity') args = parser.parse_args() # logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout) logging.basicConfig(level = logging.INFO if args.verbose else logging.WARNING, stream = sys.stdout) site_conf = appconfig('config:{}'.format(os.path.abspath(args.config))) load_environment(site_conf.global_conf, site_conf.local_conf) registry = Registry() registry.prepare() registry.register(pylons.translator, MockTranslator()) plugins.load('synchronous_search') # Purge groups & organizations. bad_groups_name = [] while True: model.repo.new_revision() group = model.Session.query(model.Group).filter( model.Group.state == 'deleted', sa.not_(model.Group.name.in_(bad_groups_name)) if bad_groups_name else None, ).first() if group is None: break name = group.name title = group.title model.Session.query(etalab_model.CertifiedPublicService).filter( etalab_model.CertifiedPublicService.organization_id == group.id, ).delete() group.purge() log.info(u'Purged group {} - {}'.format(name, title)) try: model.repo.commit_and_remove() except sqlalchemy.exc.IntegrityError: log.exception(u'An integrity error while purging {} - {}'.format(name, title)) bad_groups_name.append(name) # Purge packages. bad_packages_name = [] while True: model.repo.new_revision() package = model.Session.query(model.Package).filter( model.Package.state == 'deleted', sa.not_(model.Package.name.in_(bad_packages_name)) if bad_packages_name else None, ).first() if package is None: break name = package.name title = package.title # Delete resource_revision before purging package, to avoid IntegrityError: update or delete on table # "resource_group" violates foreign key constraint "resource_revision_resource_group_id_fkey" on table # "resource_revision". for resource_group in model.Session.query(model.ResourceGroup).filter( model.ResourceGroup.package_id == package.id, ): for resource_revision in model.Session.query(model.ResourceRevision).filter( model.ResourceRevision.resource_group_id == resource_group.id, ): model.Session.delete(resource_revision) # Delete package_relationship_revision before purging package, to avoid IntegrityError: update or # delete on table "package" violates foreign key constraint # "package_relationship_revision_subject_package_id_fkey" on table "package_relationship_revision". for package_relationship_revision in model.Session.query(model.PackageRelationshipRevision).filter( model.PackageRelationshipRevision.subject_package_id == package.id, ): model.Session.delete(package_relationship_revision) for package_relationship_revision in model.Session.query(model.PackageRelationshipRevision).filter( model.PackageRelationshipRevision.object_package_id == package.id, ): model.Session.delete(package_relationship_revision) package.purge() log.info(u'Purged package {} - {}'.format(name, title)) try: model.repo.commit_and_remove() except sqlalchemy.exc.IntegrityError: log.exception(u'An integrity error while purging {} - {}'.format(name, title)) bad_packages_name.append(name) # Purge resources. bad_resources_id = [] while True: model.repo.new_revision() resource = model.Session.query(model.Resource).filter( model.Resource.state == 'deleted', sa.not_(model.Resource.id.in_(bad_resources_id)) if bad_resources_id else None, ).first() if resource is None: break id = resource.id name = resource.name resource.purge() log.info(u'Purged resource {} - {}'.format(id, name)) try: model.repo.commit_and_remove() except sqlalchemy.exc.IntegrityError: log.exception(u'An integrity error while purging {} - {}'.format(id, name)) bad_resources_id.append(id) # Delete unused tags. for tag in model.Session.query(model.Tag): package_tag = model.Session.query(model.PackageTag).filter( model.PackageTag.tag_id == tag.id, ).first() if package_tag is None: model.Session.delete(tag) log.info(u'Deleted unused tag {}'.format(tag.name)) model.Session.commit() return 0
def main(): parser = argparse.ArgumentParser(description = __doc__) parser.add_argument('config', help = 'path of configuration file') parser.add_argument('user', help = 'name of email of user') parser.add_argument('-d', '--dry-run', action = 'store_true', help = "simulate harvesting, don't update CKAN repository") parser.add_argument('-v', '--verbose', action = 'store_true', help = 'increase output verbosity') args = parser.parse_args() # logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout) logging.basicConfig(level = logging.INFO if args.verbose else logging.WARNING, stream = sys.stdout) site_conf = appconfig('config:{}'.format(os.path.abspath(args.config))) load_environment(site_conf.global_conf, site_conf.local_conf) registry = Registry() registry.prepare() registry.register(pylons.translator, MockTranslator()) plugins.load('synchronous_search') user_name = args.user.lower().replace('.', '-dot-').replace('@', '-at-') user = model.Session.query(model.User).filter(model.User.name == user_name).one() assert user is not None, 'Unknown user: {}'.format(user_name) for membership_request in model.Session.query(youckan_model.MembershipRequest).filter( youckan_model.MembershipRequest.user_id == user.id, ): log.warning(u'Deleting membership request: {}'.format(membership_request)) if not args.dry_run: model.Session.delete(membership_request) model.Session.commit() for related in model.Session.query(model.Related).filter(model.Related.owner_id == user.id): log.warning(u'Deleting related: {}'.format(related)) if not args.dry_run: model.Session.delete(related) model.Session.commit() if not args.dry_run: model.repo.new_revision() for user_object_role in model.Session.query(model.UserObjectRole).filter( model.UserObjectRole.user_id == user.id, model.UserObjectRole.role == 'admin', ): if user_object_role.context == 'Group': group = user_object_role.group log.warning(u'Deleting group or organization: {}'.format(group)) if not args.dry_run: model.Session.query(etalab_model.CertifiedPublicService).filter( etalab_model.CertifiedPublicService.organization_id == group.id, ).delete() model.Session.delete(group) else: assert user_object_role.context == 'Package', 'Unexpected context for role: {}'.format( user_object_role.context) package = user_object_role.package # Delete resource_revision before purging package, to avoid IntegrityError: update or delete on table # "resource_group" violates foreign key constraint "resource_revision_resource_group_id_fkey" on table # "resource_revision". for resource_group in model.Session.query(model.ResourceGroup).filter( model.ResourceGroup.package_id == package.id, ): for resource_revision in model.Session.query(model.ResourceRevision).filter( model.ResourceRevision.resource_group_id == resource_group.id, ): if not args.dry_run: log.warning(u'Deleting resource_revision') model.Session.delete(resource_revision) # Delete package_relationship_revision before purging package, to avoid IntegrityError: update or # delete on table "package" violates foreign key constraint # "package_relationship_revision_subject_package_id_fkey" on table "package_relationship_revision". for package_relationship_revision in model.Session.query(model.PackageRelationshipRevision).filter( model.PackageRelationshipRevision.subject_package_id == package.id, ): if not args.dry_run: log.warning(u'Deleting package_relationship_revision') model.Session.delete(package_relationship_revision) for package_relationship_revision in model.Session.query(model.PackageRelationshipRevision).filter( model.PackageRelationshipRevision.object_package_id == package.id, ): if not args.dry_run: log.warning(u'Deleting package_relationship_revision') model.Session.delete(package_relationship_revision) log.warning(u'Deleting package: {}'.format(package)) if not args.dry_run: model.Session.delete(package) if not args.dry_run: model.repo.commit_and_remove() if not args.dry_run: log.warning(u'Deleting user: {}'.format(user)) model.Session.delete(user) model.Session.commit() return 0
def load_config(filename): print 'loading ...' from paste.deploy import appconfig from ckan.config.environment import load_environment conf = appconfig('config:' + filename) load_environment(conf.global_conf, conf.local_conf)
def main(): parser = argparse.ArgumentParser(description = __doc__) parser.add_argument('organization', help = 'name of organization') parser.add_argument('config', help = 'path of configuration file') parser.add_argument('-v', '--verbose', action = 'store_true', help = 'increase output verbosity') args = parser.parse_args() # logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout) logging.basicConfig(level = logging.INFO if args.verbose else logging.WARNING, stream = sys.stdout) site_conf = appconfig('config:{}'.format(os.path.abspath(args.config))) load_environment(site_conf.global_conf, site_conf.local_conf) organization = model.Session.query(model.Group).filter( model.Group.name == args.organization, model.Group.is_organization == True, ).first() assert organization is not None with open('/tmp/{}-demandes-adhesion.csv'.format(organization.name), 'w') as csv_file: csv_writer = csv.writer(csv_file, delimiter = ';', quotechar = '"', quoting = csv.QUOTE_MINIMAL) csv_writer.writerow([ 'ID', 'Nom', 'Courriel', 'Statut', 'Date création', 'Commentaire', 'Date gestion', 'Commentaire de refus', ]) for membership_request in organization.membership_requests: user = membership_request.user csv_writer.writerow([ unicode(cell).encode('utf-8') for cell in ( user.name, user.fullname, user.email, membership_request.status, membership_request.created, membership_request.comment, membership_request.handled_on, membership_request.refusal_comment, ) ]) with open('/tmp/{}-jeux-de-donnees.csv'.format(organization.name), 'w') as csv_file: csv_writer = csv.writer(csv_file, delimiter = ';', quotechar = '"', quoting = csv.QUOTE_MINIMAL) csv_writer.writerow([ 'URL', 'Ressources - Titres', 'Ressources - URL', 'Réutilisations - Titres', 'Réutilisations - URL', 'Alertes - Type', 'Alertes - Date création', 'Alertes - Commentaire', 'Alertes - Date fermeture', 'Alertes - Commentaire fermeture', 'Inscrits (Utiles)', 'Pages vues', 'Visites', 'Taux de sortie', ]) for package in model.Session.query(model.Package).filter( model.Package.owner_org == organization.id, model.Package.state != 'deleted', ): log.info(package.name) row = [] row.append(u'http://www.data.gouv.fr/fr/dataset/{}'.format(package.name)) community_resource_names = [] community_resource_urls = [] for community_resource in package.community_resources: community_resource_names.append(community_resource.name) community_resource_urls.append(community_resource.url) row.extend([u'\n'.join(community_resource_names), u'\n'.join(community_resource_urls)]) related_titles = [] related_urls = [] for related in package.related: related_titles.append(related.title) related_urls.append(related.url) row.extend([u'\n'.join(related_titles), u'\n'.join(related_urls)]) alerts_type = [] alerts_created = [] alerts_comment = [] alerts_closed = [] alerts_close_comment = [] for alert in package.alerts: alerts_type.append(alert.type) alerts_created.append(unicode(alert.created)) alerts_comment.append(alert.comment or u'') alerts_closed.append(unicode(alert.closed)) alerts_close_comment.append(alert.close_comment or u'') row.extend([ u'\n'.join(alerts_type), u'\n'.join(alerts_created), u'\n'.join(alerts_comment), u'\n'.join(alerts_closed), u'\n'.join(alerts_close_comment), ]) followers_count = 0 for following in model.Session.query(model.UserFollowingDataset).filter( model.UserFollowingDataset.object_id == package.id, ): user = model.Session.query(model.User).filter( model.User.id == following.follower_id, ).first() if user is None: continue # user.name, user.fullname, user.email followers_count += 1 row.append(followers_count) stats_url = urlparse.urljoin(piwik_url, u'index.php?{}'.format( urllib.urlencode(( ('date', 'today'), ('format', 'JSON'), ('idSite', '1'), ('method', 'Actions.getPageUrls'), ('module', 'API'), ('period', 'month'), ('segment', 'pageUrl=={}'.format(urlparse.urljoin(weckan_url, 'fr/dataset/{}'.format(package.name)))), )), )).replace('%2C', ',').replace('%3D%3D', '==').replace('%5B', '[').replace('%5D', ']') response = urllib2.urlopen(stats_url) response_json = json.loads(response.read()) if response_json: response_json = response_json[0] row.extend([response_json['nb_hits'], response_json['nb_visits'], response_json['exit_rate']]) else: row.extend([0, 0, 0]) csv_writer.writerow([ unicode(cell).encode('utf-8') for cell in row ]) return 0
def main_work_horse(self, job, queue): # This method is called in a worker's work horse process right # after forking. load_environment(config[u'global_conf'], config) return super(Worker, self).main_work_horse(job, queue)
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('organization', help='name of organization') parser.add_argument('config', help='path of configuration file') parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity') args = parser.parse_args() # logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout) logging.basicConfig( level=logging.INFO if args.verbose else logging.WARNING, stream=sys.stdout) site_conf = appconfig('config:{}'.format(os.path.abspath(args.config))) load_environment(site_conf.global_conf, site_conf.local_conf) organization = model.Session.query(model.Group).filter( model.Group.name == args.organization, model.Group.is_organization == True, ).first() assert organization is not None with open('/tmp/{}-demandes-adhesion.csv'.format(organization.name), 'w') as csv_file: csv_writer = csv.writer(csv_file, delimiter=';', quotechar='"', quoting=csv.QUOTE_MINIMAL) csv_writer.writerow([ 'ID', 'Nom', 'Courriel', 'Statut', 'Date création', 'Commentaire', 'Date gestion', 'Commentaire de refus', ]) for membership_request in organization.membership_requests: user = membership_request.user csv_writer.writerow([ unicode(cell).encode('utf-8') for cell in ( user.name, user.fullname, user.email, membership_request.status, membership_request.created, membership_request.comment, membership_request.handled_on, membership_request.refusal_comment, ) ]) with open('/tmp/{}-jeux-de-donnees.csv'.format(organization.name), 'w') as csv_file: csv_writer = csv.writer(csv_file, delimiter=';', quotechar='"', quoting=csv.QUOTE_MINIMAL) csv_writer.writerow([ 'URL', 'Ressources - Titres', 'Ressources - URL', 'Réutilisations - Titres', 'Réutilisations - URL', 'Alertes - Type', 'Alertes - Date création', 'Alertes - Commentaire', 'Alertes - Date fermeture', 'Alertes - Commentaire fermeture', 'Inscrits (Utiles)', 'Pages vues', 'Visites', 'Taux de sortie', ]) for package in model.Session.query(model.Package).filter( model.Package.owner_org == organization.id, model.Package.state != 'deleted', ): log.info(package.name) row = [] row.append(u'http://www.data.gouv.fr/fr/dataset/{}'.format( package.name)) community_resource_names = [] community_resource_urls = [] for community_resource in package.community_resources: community_resource_names.append(community_resource.name) community_resource_urls.append(community_resource.url) row.extend([ u'\n'.join(community_resource_names), u'\n'.join(community_resource_urls) ]) related_titles = [] related_urls = [] for related in package.related: related_titles.append(related.title) related_urls.append(related.url) row.extend([u'\n'.join(related_titles), u'\n'.join(related_urls)]) alerts_type = [] alerts_created = [] alerts_comment = [] alerts_closed = [] alerts_close_comment = [] for alert in package.alerts: alerts_type.append(alert.type) alerts_created.append(unicode(alert.created)) alerts_comment.append(alert.comment or u'') alerts_closed.append(unicode(alert.closed)) alerts_close_comment.append(alert.close_comment or u'') row.extend([ u'\n'.join(alerts_type), u'\n'.join(alerts_created), u'\n'.join(alerts_comment), u'\n'.join(alerts_closed), u'\n'.join(alerts_close_comment), ]) followers_count = 0 for following in model.Session.query( model.UserFollowingDataset).filter( model.UserFollowingDataset.object_id == package.id, ): user = model.Session.query(model.User).filter( model.User.id == following.follower_id, ).first() if user is None: continue # user.name, user.fullname, user.email followers_count += 1 row.append(followers_count) stats_url = urlparse.urljoin( piwik_url, u'index.php?{}'.format( urllib.urlencode(( ('date', 'today'), ('format', 'JSON'), ('idSite', '1'), ('method', 'Actions.getPageUrls'), ('module', 'API'), ('period', 'month'), ('segment', 'pageUrl=={}'.format( urlparse.urljoin( weckan_url, 'fr/dataset/{}'.format(package.name)))), )), )).replace('%2C', ',').replace('%3D%3D', '==').replace( '%5B', '[').replace('%5D', ']') response = urllib2.urlopen(stats_url) response_json = json.loads(response.read()) if response_json: response_json = response_json[0] row.extend([ response_json['nb_hits'], response_json['nb_visits'], response_json['exit_rate'] ]) else: row.extend([0, 0, 0]) csv_writer.writerow( [unicode(cell).encode('utf-8') for cell in row]) return 0
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('config', help='path of configuration file') parser.add_argument('-t', '--territoria-url', help='Territoria URL', required=True) parser.add_argument('-v', '--verbose', action='store_true', help='increase output verbosity') args = parser.parse_args() # logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout) logging.basicConfig( level=logging.INFO if args.verbose else logging.WARNING, stream=sys.stdout) site_conf = appconfig('config:{}'.format(os.path.abspath(args.config))) load_environment(site_conf.global_conf, site_conf.local_conf) registry = Registry() registry.prepare() registry.register(pylons.translator, MockTranslator()) plugins.load('synchronous_search') model.repo.new_revision() kind_code_name_by_kind_code = {} for package_extra in model.Session.query(model.PackageExtra).filter( model.PackageExtra.key == 'territorial_coverage', ): if package_extra.value == 'Coutry/FR': kind_code_name = 'Country/FR/FRANCE' elif package_extra.value == 'InternationalOrganization/EU': kind_code_name = 'InternationalOrganization/UE/UNION EUROPEENNE' elif package_extra.value.count('/') == 1: kind_code_name = kind_code_name_by_kind_code.get( package_extra.value) if kind_code_name is None: kind, code = package_extra.value.split('/') try: response = urllib2.urlopen( urlparse.urljoin( args.territoria_url, '/api/v1/territory?{}'.format( urllib.urlencode(dict( code=code, kind=kind, ), doseq=True)))) except urllib2.HTTPError, response: print package_extra.value raise response_dict = json.loads(response.read()) main_postal_distribution = response_dict['data'][ 'main_postal_distribution'] kind_code_name_by_kind_code[ package_extra.value] = kind_code_name = u'/'.join( [kind, code, main_postal_distribution]) print kind_code_name else: continue package = package_extra.package if package.private or package.state != 'active': log.warning( u'Territorial coverage of package {} must be manually corrected' .format(package.name)) continue package_extra.value = kind_code_name
def make_app(global_conf, full_stack=True, static_files=True, **app_conf): """Create a Pylons WSGI application and return it ``global_conf`` The inherited configuration for this application. Normally from the [DEFAULT] section of the Paste ini file. ``full_stack`` Whether this application provides a full WSGI stack (by default, meaning it handles its own exceptions and errors). Disable full_stack when this application is "managed" by another WSGI middleware. ``static_files`` Whether this application serves its own static files; disable when another web server is responsible for serving them. ``app_conf`` The application's local configuration. Normally specified in the [app:<name>] section of the Paste ini file (where <name> defaults to main). """ # Configure the Pylons environment load_environment(global_conf, app_conf) # The Pylons WSGI app app = PylonsApp() for plugin in PluginImplementations(IMiddleware): app = plugin.make_middleware(app, config) # Routing/Session/Cache Middleware app = RoutesMiddleware(app, config['routes.map']) app = SessionMiddleware(app, config) app = CacheMiddleware(app, config) # CUSTOM MIDDLEWARE HERE (filtered by error handling middlewares) #app = QueueLogMiddleware(app) if asbool(full_stack): # Handle Python exceptions app = ErrorHandler(app, global_conf, **config['pylons.errorware']) # Display error documents for 401, 403, 404 status codes (and # 500 when debug is disabled) if asbool(config['debug']): app = StatusCodeRedirect(app, [400, 404]) else: app = StatusCodeRedirect(app, [400, 404, 500]) # Initialize repoze.who who_parser = WhoConfig(global_conf['here']) who_parser.parse(open(app_conf['who.config_file'])) if asbool(config.get('openid_enabled', 'true')): from repoze.who.plugins.openid.identification import OpenIdIdentificationPlugin who_parser.identifiers = [i for i in who_parser.identifiers if \ not isinstance(i, OpenIdIdentificationPlugin)] who_parser.challengers = [i for i in who_parser.challengers if \ not isinstance(i, OpenIdIdentificationPlugin)] app = PluggableAuthenticationMiddleware( app, who_parser.identifiers, who_parser.authenticators, who_parser.challengers, who_parser.mdproviders, who_parser.request_classifier, who_parser.challenge_decider, logging.getLogger('repoze.who'), logging.WARN, # ignored who_parser.remote_user_key, ) # Establish the Registry for this application app = RegistryManager(app) if asbool(static_files): # Serve static files static_max_age = None if not asbool(config.get('ckan.cache_enabled')) \ else int(config.get('ckan.static_max_age', 3600)) static_app = StaticURLParser(config['pylons.paths']['static_files'], cache_max_age=static_max_age) static_parsers = [static_app, app] # Configurable extra static file paths extra_static_parsers = [] for public_path in config.get('extra_public_paths', '').split(','): if public_path.strip(): extra_static_parsers.append( StaticURLParser(public_path.strip(), cache_max_age=static_max_age)) app = Cascade(extra_static_parsers + static_parsers) return app
def make_app(conf, full_stack=True, static_files=True, **app_conf): """Create a Pylons WSGI application and return it ``conf`` The inherited configuration for this application. Normally from the [DEFAULT] section of the Paste ini file. ``full_stack`` Whether this application provides a full WSGI stack (by default, meaning it handles its own exceptions and errors). Disable full_stack when this application is "managed" by another WSGI middleware. ``static_files`` Whether this application serves its own static files; disable when another web server is responsible for serving them. ``app_conf`` The application's local configuration. Normally specified in the [app:<name>] section of the Paste ini file (where <name> defaults to main). """ # Configure the Pylons environment load_environment(conf, app_conf) # The Pylons WSGI app app = PylonsApp() # set pylons globals app_globals.reset() for plugin in PluginImplementations(IMiddleware): app = plugin.make_middleware(app, config) # Routing/Session/Cache Middleware app = RoutesMiddleware(app, config['routes.map']) # we want to be able to retrieve the routes middleware to be able to update # the mapper. We store it in the pylons config to allow this. config['routes.middleware'] = app app = SessionMiddleware(app, config) app = CacheMiddleware(app, config) # CUSTOM MIDDLEWARE HERE (filtered by error handling middlewares) #app = QueueLogMiddleware(app) # Fanstatic if asbool(config.get('debug', False)): fanstatic_config = { 'versioning': True, 'recompute_hashes': True, 'minified': False, 'bottom': True, 'bundle': False, } else: fanstatic_config = { 'versioning': True, 'recompute_hashes': False, 'minified': True, 'bottom': True, 'bundle': True, } app = Fanstatic(app, **fanstatic_config) if asbool(full_stack): # Handle Python exceptions app = ErrorHandler(app, conf, **config['pylons.errorware']) # Display error documents for 401, 403, 404 status codes (and # 500 when debug is disabled) if asbool(config['debug']): app = StatusCodeRedirect(app, [400, 404]) else: app = StatusCodeRedirect(app, [400, 404, 500]) # Initialize repoze.who who_parser = WhoConfig(conf['here']) who_parser.parse(open(app_conf['who.config_file'])) if asbool(config.get('openid_enabled', 'true')): from repoze.who.plugins.openid.identification import OpenIdIdentificationPlugin # Monkey patches for repoze.who.openid # Fixes #1659 - enable log-out when CKAN mounted at non-root URL from ckan.lib import repoze_patch OpenIdIdentificationPlugin.identify = repoze_patch.identify OpenIdIdentificationPlugin.redirect_to_logged_in = repoze_patch.redirect_to_logged_in OpenIdIdentificationPlugin._redirect_to_loginform = repoze_patch._redirect_to_loginform OpenIdIdentificationPlugin.challenge = repoze_patch.challenge who_parser.identifiers = [i for i in who_parser.identifiers if \ not isinstance(i, OpenIdIdentificationPlugin)] who_parser.challengers = [i for i in who_parser.challengers if \ not isinstance(i, OpenIdIdentificationPlugin)] app = PluggableAuthenticationMiddleware(app, who_parser.identifiers, who_parser.authenticators, who_parser.challengers, who_parser.mdproviders, who_parser.request_classifier, who_parser.challenge_decider, logging.getLogger('repoze.who'), logging.WARN, # ignored who_parser.remote_user_key, ) # Establish the Registry for this application app = RegistryManager(app) app = I18nMiddleware(app, config) if asbool(static_files): # Serve static files static_max_age = None if not asbool(config.get('ckan.cache_enabled')) \ else int(config.get('ckan.static_max_age', 3600)) static_app = StaticURLParser(config['pylons.paths']['static_files'], cache_max_age=static_max_age) static_parsers = [static_app, app] # Configurable extra static file paths extra_static_parsers = [] for public_path in config.get('extra_public_paths', '').split(','): if public_path.strip(): extra_static_parsers.append( StaticURLParser(public_path.strip(), cache_max_age=static_max_age) ) app = Cascade(extra_static_parsers + static_parsers) # Page cache if asbool(config.get('ckan.page_cache_enabled')): app = PageCacheMiddleware(app, config) # Tracking if asbool(config.get('ckan.tracking_enabled', 'false')): app = TrackingMiddleware(app, config) return app
def main(): parser = argparse.ArgumentParser(description = __doc__) parser.add_argument('config', help = 'path of configuration file') parser.add_argument('-g', '--go', action = 'store_true', help = 'Change URLs of files') parser.add_argument('-v', '--verbose', action = 'store_true', help = 'increase output verbosity') args = parser.parse_args() # logging.basicConfig(level = logging.DEBUG if args.verbose else logging.WARNING, stream = sys.stdout) logging.basicConfig(level = logging.INFO if args.verbose else logging.WARNING, stream = sys.stdout) site_conf = appconfig('config:{}'.format(os.path.abspath(args.config))) load_environment(site_conf.global_conf, site_conf.local_conf) registry = Registry() registry.prepare() registry.register(pylons.translator, MockTranslator()) plugins.load('synchronous_search') bad_resources_url = set() while True: model.repo.new_revision() resources_found = False resource_index = 0 for resource in model.Session.query(model.Resource).filter( model.Resource.url.like('http://www.data.gouv.fr/%'), ): resource_url, error = conv.pipe( conv.make_input_to_url(full = True), conv.not_none, )(resource.url, state = conv.default_state) if error is not None: continue resource_url = resource_url.encode('utf-8') if resource_url.startswith(('http://static.data.gouv.fr/', 'https://static.data.gouv.fr/')): continue if not resource_url.startswith(('http://www.data.gouv.fr/', 'https://www.data.gouv.fr/')): continue if resource_url in bad_resources_url: continue resource_url_path = urlparse.urlsplit(resource_url).path print resource_url try: response = urllib2.urlopen(resource_url, timeout = 30) except socket.timeout: resources_found = True continue except urllib2.HTTPError: bad_resources_url.add(resource_url) continue except urllib2.URLError: bad_resources_url.add(resource_url) continue resources_found = True resource_buffer = response.read() resource_hash = hashlib.sha256(resource_buffer).hexdigest() resource_url_path = '{}/{}{}'.format(resource_hash[:2], resource_hash[2:], os.path.splitext(resource_url_path)[-1]) resource_path = '/tmp/resources/{}'.format(resource_url_path) print ' ', resource_path dir = os.path.dirname(resource_path) if not os.path.exists(dir): os.makedirs(dir) with open(resource_path, 'w') as resource_file: resource_file.write(resource_buffer) if args.go: resource.url = 'http://static.data.gouv.fr/{}'.format(resource_url_path) resource_index += 1 if resource_index >= 5: break if resources_found: model.repo.commit_and_remove() else: break if not args.go: print 'WARNING: URLs have not been modified. Transfer images then use the --go option.' return 0