def __init__(self, configuration): self.config = configuration self.input = None self.parser = None self.metadata = None # Input is required arguments = {} if 'arguments' in self.config['input']: arguments = self.config['input']['arguments'] if self.config['input']['type'] == 'command': self.input = CommandRunner(**arguments) elif self.config['input']['type'] == 'file': self.input = FileReader(**arguments) elif self.config['input']['type'] == 'http': self.input = HTTPReader(**arguments) elif self.config['input']['type'] == 'class': self.input = init_object(self.config['input']['name'], **arguments) elif self.config['input']['type'] == 'tailer': if 'tailer' in config: self.input = Tailer(config['tailer']) else: raise AttributeError( "Missing tailer in config file for tailer type input") assert self.input # parser is optional for parsing data collected by input if 'parser' in self.config: arguments = {} if 'arguments' in self.config['parser']: arguments = self.config['parser']['arguments'] if self.config['parser']['type'] == 'match': self.parser = MatchParser( self.config['parser']['pattern'].strip(), self.config['parser']['transform'].strip()) elif self.config['parser']['type'] == 'split': self.parser = SplitParser( self.config['parser']['delimiter'].strip(), self.config['parser']['transform'].strip()) elif self.config['parser']['type'] == 'dummy': self.parser = DummyParser() elif self.config['parser']['type'] == 'json': self.parser = JsonGrepParser(**arguments) elif self.config['parser']['type'] == 'class': self.parser = init_object(self.config['parser']['name'], **arguments) self._max_error_count = self.config['input'].get('max_error_count', -1) self._current_data = None self._number_collected = 0 self._number_failed = 0 self._error_count = 0 self._output = create_output(config['output']) if 'metadata' in self.config: self.metadata = self.config['metadata']
def __init__(self, collector_name, config, output, tailer): threading.Thread.__init__(self, name=collector_name) self.__collector_name=collector_name self.__config=config self.__sleep_time=self.__config['input'].get('frequency',10) self.__cron=self.__config['input'].get('schedule',None) self.__schedule=None if self.__cron is not None: self.__schedule=CronEvent(self.__cron) log.debug("job scheduled at %s"%self.__schedule.numerical_tab) self.__input=None self.__parser=None self.__output=output if self.__config['input']['type']=='command': self.__input=CommandRunner(self.__config['input']['source']) elif self.__config['input']['type']=='file': self.__input=FileReader(self.__config['input']['path']) elif self.__config['input']['type']=='http': #log.debug('input %s'%self.__config['input']) url=self.__config['input']['url'] headers=self.__config['input'].get('headers', {}) #log.debug('headers %s'%headers) auth=self.__config['input'].get('auth', None) self.__input=HTTPReader(url, headers, auth) elif self.__config['input']['type']=='class': arguments={} if 'arguments' in self.__config['input']: arguments=self.__config['input']['arguments'] self.__input=init_object(self.__config['input']['name'], **arguments) elif self.__config['input']['type']=='tailer': self.__input=tailer if 'parser' in self.__config: if self.__config['parser']['type']=='match': self.__parser=MatchParser(self.__config['parser']['pattern'].strip(), self.__config['parser']['transform'].strip()) elif self.__config['parser']['type']=='split': self.__parser=SplitParser(self.__config['parser']['delimiter'].strip(), self.__config['parser']['transform'].strip()) elif self.__config['parser']['type']=='dummy': self.__parser=DummyParser() elif self.__config['parser']['type']=='json': arguments={} if 'arguments' in self.__config['parser']: arguments=self.__config['parser']['arguments'] self.__parser=JsonGrepParser(**arguments) elif self.__config['parser']['type']=='class': arguments={} if 'arguments' in self.__config['parser']: arguments=self.__config['parser']['arguments'] self.__parser=init_object(self.__config['parser']['name'], **arguments) self.__running=True self.__session_id=str(uuid.uuid4()) self.__max_error_count=self.__config['input'].get('max_error_count', -1) self.__current_data=None self.__number_collected=0 self.__number_failed=0 self.__sleep_count=0 self.__error_count=0 self.__last_check_minute=-1
def init(self): if 'global' in config: global_vars=config['global'] set_global(global_vars) for n,cfg in config['output'].iteritems(): if n=='buffer': if not 'directory' in cfg: print("ERROR: buffer directory not specified in config.") return False buffer_dir=cfg['directory'] if os.path.exists(buffer_dir) and (not os.path.isdir(buffer_dir)): print("ERROR: buffer directory exists but it is not a directory.") return False if not os.path.exists(buffer_dir): log.info("Creating buffer directory %s." % buffer_dir) os.makedirs(buffer_dir) self.__outputs[n]=BufferOutput(cfg) elif n=='kafka-http': self.__outputs[n]=KafkaHTTPOutput(cfg) elif n=='file': self.__outputs[n]=FileOutput(cfg) elif 'class' in cfg: arguments={} if 'arguments' in cfg: arguments=cfg['arguments'] self.__outputs[n]=init_object(cfg['class'], **arguments) if 'pusher' in config: if not 'directory' in config['pusher'] or not 'output' in config['pusher']: print("ERROR: need to speficity directory and output in pusher.") return False if 'tailer' in config: self.__tailer=Tailer(config['tailer']) return True
def create_output(config): """Create an output object""" # all output classes take one single config argument # this is different to input classes arguments = {} if 'arguments' in config: arguments = config['arguments'] return init_object(config['class'], arguments)
def create_input(input_config, **kwargs): # Only support class type now # these class needs to take whatever caller set if 'class' in input_config: class_name = input_config['class'] else: assert input_config['type'] == 'class' class_name = input_config['name'] if kwargs is None: if 'arguments' in input_config: kwargs = input_config['arguments'] return init_object(class_name, **kwargs)
def __init__(self, collector_name, config, output, tailer=None): threading.Thread.__init__(self, name=collector_name) self.__collector_name=collector_name self.__config=config self.__sleep_time=self.__config['input'].get('frequency',10) self.__cron=self.__config['input'].get('schedule',None) self.__schedule=None if self.__cron is not None: self.__schedule=CronEvent(self.__cron) log.debug("job scheduled at %s"%self.__schedule.numerical_tab) self.__input=None self.__parser=None self.__output=output if self.__config['input']['type']=='command': self.__input=CommandRunner(self.__config['input']['source']) elif self.__config['input']['type']=='file': self.__input=FileReader(self.__config['input']['path']) elif self.__config['input']['type']=='http': #log.debug('input %s'%self.__config['input']) url=self.__config['input']['url'] headers=self.__config['input'].get('headers', {}) #log.debug('headers %s'%headers) auth=self.__config['input'].get('auth', None) self.__input=HTTPReader(url, headers, auth) elif self.__config['input']['type']=='class': arguments={} if 'arguments' in self.__config['input']: arguments=self.__config['input']['arguments'] self.__input=init_object(self.__config['input']['name'], **arguments) elif self.__config['input']['type']=='tailer': if tailer is None: raise AttributeError("Missing tailer in config file for tailer type input") self.__input=tailer assert(self.__input) if 'parser' in self.__config: if self.__config['parser']['type']=='match': self.__parser=MatchParser(self.__config['parser']['pattern'].strip(), self.__config['parser']['transform'].strip()) elif self.__config['parser']['type']=='split': self.__parser=SplitParser(self.__config['parser']['delimiter'].strip(), self.__config['parser']['transform'].strip()) elif self.__config['parser']['type']=='dummy': self.__parser=DummyParser() elif self.__config['parser']['type']=='json': arguments={} if 'arguments' in self.__config['parser']: arguments=self.__config['parser']['arguments'] self.__parser=JsonGrepParser(**arguments) elif self.__config['parser']['type']=='class': arguments={} if 'arguments' in self.__config['parser']: arguments=self.__config['parser']['arguments'] self.__parser=init_object(self.__config['parser']['name'], **arguments) self.__running=True self.__session_id=str(uuid.uuid4()) self.__max_error_count=self.__config['input'].get('max_error_count', -1) self.__current_data=None self.__number_collected=0 self.__number_failed=0 self.__sleep_count=0 self.__error_count=0 self.__last_check_minute=-1