class EnvironmentParamsContainer(task.Task): ''' Keeps track of a bunch of environment params. Uses the internal luigi parameter mechanism. The nice thing is that we can instantiate this class and get an object with all the environment variables set. This is arguably a bit of a hack.''' # TODO(erikbern): would be cleaner if we don't have to read config in global scope local_scheduler = parameter.BooleanParameter(is_global=True, default=False, description='Use local scheduling') scheduler_host = parameter.Parameter(is_global=True, default=get_config().get('core', 'default-scheduler-host', default='localhost'), description='Hostname of machine running remote scheduler') scheduler_port = parameter.IntParameter(is_global=True, default=8082, description='Port of remote scheduler api process') lock = parameter.BooleanParameter(is_global=True, default=False, description='Do not run if the task is already running') lock_pid_dir = parameter.Parameter(is_global=True, default='/var/tmp/luigi', description='Directory to store the pid file') workers = parameter.IntParameter(is_global=True, default=1, description='Maximum number of parallel tasks to run') @classmethod def env_params(cls, override_defaults): # Override any global parameter with whatever is in override_defaults for param_name, param_obj in cls.get_global_params(): if param_name in override_defaults: param_obj.set_default(override_defaults[param_name]) return cls() # instantiate an object with the global params set on it
class EnvironmentParamsContainer(task.Task): ''' Keeps track of a bunch of environment params. Uses the internal luigi parameter mechanism. The nice thing is that we can instantiate this class and get an object with all the environment variables set. This is arguably a bit of a hack.''' local_scheduler = parameter.BooleanParameter( is_global=True, default=False, description='Use local scheduling') scheduler_host = parameter.Parameter( is_global=True, default='localhost', description='Hostname of machine running remote scheduler', config_path=dict(section='core', name='default-scheduler-host')) scheduler_port = parameter.IntParameter( is_global=True, default=8082, description='Port of remote scheduler api process', config_path=dict(section='core', name='default-scheduler-port')) lock = parameter.BooleanParameter( is_global=True, default=False, description='(Deprecated, replaced by no_lock)' 'Do not run if similar process is already running') lock_size = parameter.IntParameter( is_global=True, default=1, description="Maximum number of workers running the same command") no_lock = parameter.BooleanParameter( is_global=True, default=False, description='Ignore if similar process is already running') lock_pid_dir = parameter.Parameter( is_global=True, default='/var/tmp/luigi', description='Directory to store the pid file') workers = parameter.IntParameter( is_global=True, default=1, description='Maximum number of parallel tasks to run') logging_conf_file = parameter.Parameter( is_global=True, default=None, description='Configuration file for logging', config_path=dict(section='core', name='logging_conf_file')) module = parameter.Parameter( is_global=True, default=None, description='Used for dynamic loading of modules' ) # see DynamicArgParseInterface @classmethod def env_params(cls, override_defaults={}): # Override any global parameter with whatever is in override_defaults for param_name, param_obj in cls.get_global_params(): if param_name in override_defaults: param_obj.set_global(override_defaults[param_name]) return cls() # instantiate an object with the global params set on it
class EnvironmentParamsContainer(task.Task): ''' Keeps track of a bunch of environment params. Uses the internal luigi parameter mechanism. The nice thing is that we can instantiate this class and get an object with all the environment variables set. This is arguably a bit of a hack.''' local_scheduler = parameter.BooleanParameter( is_global=True, default=False, description='Use local scheduling') scheduler_host = parameter.Parameter( is_global=True, default=None, description='Hostname of machine running remote scheduler') scheduler_port = parameter.IntParameter( is_global=True, default=None, description='Port of remote scheduler api process') lock = parameter.BooleanParameter( is_global=True, default=True, description='(Deprecated, replaced by no_lock)' 'Do not run if similar process is already running') no_lock = parameter.BooleanParameter( is_global=True, default=False, description='Ignore if similar process is already running') lock_pid_dir = parameter.Parameter( is_global=True, default='/var/tmp/luigi', description='Directory to store the pid file') workers = parameter.IntParameter( is_global=True, default=1, description='Maximum number of parallel tasks to run') logging_conf_file = parameter.Parameter( is_global=True, default=None, description='Configuration file for logging') @classmethod def apply_config_defaults(cls): cls.scheduler_host.set_default(configuration.get_config().get( 'core', 'default-scheduler-host', 'localhost')) cls.scheduler_port.set_default(configuration.get_config().get( 'core', 'default-scheduler-port', 8082)) cls.logging_conf_file.set_default(configuration.get_config().get( 'core', 'logging_conf_file', None)) @classmethod def env_params(cls, override_defaults): cls.apply_config_defaults() # Override any global parameter with whatever is in override_defaults for param_name, param_obj in cls.get_global_params(): if param_name in override_defaults: param_obj.set_default(override_defaults[param_name]) return cls() # instantiate an object with the global params set on it
class EnvironmentParamsContainer(task.Task): ''' Keeps track of a bunch of environment params. Uses the internal luigi parameter mechanism. The nice thing is that we can instantiate this class and get an object with all the environment variables set. This is arguably a bit of a hack.''' # TODO(erikbern): would be cleaner if we don't have to read config in global scope local_scheduler = parameter.BooleanParameter(is_global=True, default=False, description='Use local scheduling') scheduler_host = parameter.Parameter(is_global=True, default=get_config().get('core', 'default-scheduler-host', default='localhost'), description='Hostname of machine running remote scheduler') lock = parameter.BooleanParameter(is_global=True, default=False, description='Do not run if the task is already running') lock_pid_dir = parameter.Parameter(is_global=True, default='/var/tmp/luigi', description='Directory to store the pid file') workers = parameter.IntParameter(is_global=True, default=1, description='Maximum number of parallel tasks to run')