def test_cron_event1(self): cron1 = CronEvent("0 1 * * *") #print cron1.numerical_tab self.assertTrue(cron1.numerical_tab[0] == set([0])) self.assertTrue(cron1.numerical_tab[1] == set([1])) self.assertTrue(cron1.numerical_tab[2] == set(xrange(1, 32))) self.assertTrue(cron1.numerical_tab[3] == set(xrange(1, 13))) self.assertTrue(cron1.numerical_tab[4] == set(xrange(0, 7))) self.assertFalse(cron1.check_trigger((2015, 6, 16, 10, 0)))
def test_cron_event2(self): cron2 = CronEvent("0 */5 * * 0-4") #print cron2.numerical_tab self.assertTrue(cron2.numerical_tab[0] == set([0])) self.assertTrue(cron2.numerical_tab[1] == set([0, 10, 20, 5, 15])) self.assertTrue(cron2.numerical_tab[2] == set([])) self.assertTrue(cron2.numerical_tab[3] == set(xrange(1, 13))) self.assertTrue(cron2.numerical_tab[4] == set(xrange(0, 5))) self.assertTrue(cron2.check_trigger((2015, 6, 16, 10, 0)))
def test_cron_event1(self): cron1=CronEvent("0 1 * * *") #print cron1.numerical_tab self.assertTrue(cron1.numerical_tab[0]==set([0])) self.assertTrue(cron1.numerical_tab[1]==set([1])) self.assertTrue(cron1.numerical_tab[2]==set(xrange(1,32))) self.assertTrue(cron1.numerical_tab[3]==set(xrange(1,13))) self.assertTrue(cron1.numerical_tab[4]==set(xrange(0,7))) self.assertFalse(cron1.check_trigger((2015,6,16,10,0)))
def test_cron_event3(self): cron3=CronEvent("*/2 * * * *") #print cron1.numerical_tab self.assertTrue(cron3.numerical_tab[0]==set([0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58])) self.assertTrue(cron3.numerical_tab[1]==set(xrange(0,24))) self.assertTrue(cron3.numerical_tab[2]==set(xrange(1,32))) self.assertTrue(cron3.numerical_tab[3]==set(xrange(1,13))) self.assertTrue(cron3.numerical_tab[4]==set(xrange(0,7))) self.assertTrue(cron3.check_trigger((2015,6,17,9,40)))
def test_cron_event2(self): cron2=CronEvent("0 */5 * * 0-4") #print cron2.numerical_tab self.assertTrue(cron2.numerical_tab[0]==set([0])) self.assertTrue(cron2.numerical_tab[1]==set([0, 10, 20, 5, 15])) self.assertTrue(cron2.numerical_tab[2]==set([])) self.assertTrue(cron2.numerical_tab[3]==set(xrange(1,13))) self.assertTrue(cron2.numerical_tab[4]==set(xrange(0,5))) self.assertTrue(cron2.check_trigger((2015,6,16,10,0)))
def test_cron_event3(self): cron3 = CronEvent("*/2 * * * *") #print cron1.numerical_tab self.assertTrue(cron3.numerical_tab[0] == set([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58 ])) self.assertTrue(cron3.numerical_tab[1] == set(xrange(0, 24))) self.assertTrue(cron3.numerical_tab[2] == set(xrange(1, 32))) self.assertTrue(cron3.numerical_tab[3] == set(xrange(1, 13))) self.assertTrue(cron3.numerical_tab[4] == set(xrange(0, 7))) self.assertTrue(cron3.check_trigger((2015, 6, 17, 9, 40)))
def __init__(self, collector_name, config, output, tailer): threading.Thread.__init__(self, name=collector_name) self.__collector_name=collector_name self.__config=config self.__sleep_time=self.__config['input'].get('frequency',10) self.__cron=self.__config['input'].get('schedule',None) self.__schedule=None if self.__cron is not None: self.__schedule=CronEvent(self.__cron) log.debug("job scheduled at %s"%self.__schedule.numerical_tab) self.__input=None self.__parser=None self.__output=output if self.__config['input']['type']=='command': self.__input=CommandRunner(self.__config['input']['source']) elif self.__config['input']['type']=='file': self.__input=FileReader(self.__config['input']['path']) elif self.__config['input']['type']=='http': #log.debug('input %s'%self.__config['input']) url=self.__config['input']['url'] headers=self.__config['input'].get('headers', {}) #log.debug('headers %s'%headers) auth=self.__config['input'].get('auth', None) self.__input=HTTPReader(url, headers, auth) elif self.__config['input']['type']=='class': arguments={} if 'arguments' in self.__config['input']: arguments=self.__config['input']['arguments'] self.__input=init_object(self.__config['input']['name'], **arguments) elif self.__config['input']['type']=='tailer': self.__input=tailer if 'parser' in self.__config: if self.__config['parser']['type']=='match': self.__parser=MatchParser(self.__config['parser']['pattern'].strip(), self.__config['parser']['transform'].strip()) elif self.__config['parser']['type']=='split': self.__parser=SplitParser(self.__config['parser']['delimiter'].strip(), self.__config['parser']['transform'].strip()) elif self.__config['parser']['type']=='dummy': self.__parser=DummyParser() elif self.__config['parser']['type']=='json': arguments={} if 'arguments' in self.__config['parser']: arguments=self.__config['parser']['arguments'] self.__parser=JsonGrepParser(**arguments) elif self.__config['parser']['type']=='class': arguments={} if 'arguments' in self.__config['parser']: arguments=self.__config['parser']['arguments'] self.__parser=init_object(self.__config['parser']['name'], **arguments) self.__running=True self.__session_id=str(uuid.uuid4()) self.__max_error_count=self.__config['input'].get('max_error_count', -1) self.__current_data=None self.__number_collected=0 self.__number_failed=0 self.__sleep_count=0 self.__error_count=0 self.__last_check_minute=-1
def __init__(self, collector_name, config, output, tailer=None): threading.Thread.__init__(self, name=collector_name) self.__collector_name=collector_name self.__config=config self.__sleep_time=self.__config['input'].get('frequency',10) self.__cron=self.__config['input'].get('schedule',None) self.__schedule=None if self.__cron is not None: self.__schedule=CronEvent(self.__cron) log.debug("job scheduled at %s"%self.__schedule.numerical_tab) self.__input=None self.__parser=None self.__output=output if self.__config['input']['type']=='command': self.__input=CommandRunner(self.__config['input']['source']) elif self.__config['input']['type']=='file': self.__input=FileReader(self.__config['input']['path']) elif self.__config['input']['type']=='http': #log.debug('input %s'%self.__config['input']) url=self.__config['input']['url'] headers=self.__config['input'].get('headers', {}) #log.debug('headers %s'%headers) auth=self.__config['input'].get('auth', None) self.__input=HTTPReader(url, headers, auth) elif self.__config['input']['type']=='class': arguments={} if 'arguments' in self.__config['input']: arguments=self.__config['input']['arguments'] self.__input=init_object(self.__config['input']['name'], **arguments) elif self.__config['input']['type']=='tailer': if tailer is None: raise AttributeError("Missing tailer in config file for tailer type input") self.__input=tailer assert(self.__input) if 'parser' in self.__config: if self.__config['parser']['type']=='match': self.__parser=MatchParser(self.__config['parser']['pattern'].strip(), self.__config['parser']['transform'].strip()) elif self.__config['parser']['type']=='split': self.__parser=SplitParser(self.__config['parser']['delimiter'].strip(), self.__config['parser']['transform'].strip()) elif self.__config['parser']['type']=='dummy': self.__parser=DummyParser() elif self.__config['parser']['type']=='json': arguments={} if 'arguments' in self.__config['parser']: arguments=self.__config['parser']['arguments'] self.__parser=JsonGrepParser(**arguments) elif self.__config['parser']['type']=='class': arguments={} if 'arguments' in self.__config['parser']: arguments=self.__config['parser']['arguments'] self.__parser=init_object(self.__config['parser']['name'], **arguments) self.__running=True self.__session_id=str(uuid.uuid4()) self.__max_error_count=self.__config['input'].get('max_error_count', -1) self.__current_data=None self.__number_collected=0 self.__number_failed=0 self.__sleep_count=0 self.__error_count=0 self.__last_check_minute=-1
class Collector(threading.Thread): def __init__(self, collector_name, config, output, tailer=None): threading.Thread.__init__(self, name=collector_name) self.__collector_name=collector_name self.__config=config self.__sleep_time=self.__config['input'].get('frequency',10) self.__cron=self.__config['input'].get('schedule',None) self.__schedule=None if self.__cron is not None: self.__schedule=CronEvent(self.__cron) log.debug("job scheduled at %s"%self.__schedule.numerical_tab) self.__input=None self.__parser=None self.__output=output if self.__config['input']['type']=='command': self.__input=CommandRunner(self.__config['input']['source']) elif self.__config['input']['type']=='file': self.__input=FileReader(self.__config['input']['path']) elif self.__config['input']['type']=='http': #log.debug('input %s'%self.__config['input']) url=self.__config['input']['url'] headers=self.__config['input'].get('headers', {}) #log.debug('headers %s'%headers) auth=self.__config['input'].get('auth', None) self.__input=HTTPReader(url, headers, auth) elif self.__config['input']['type']=='class': arguments={} if 'arguments' in self.__config['input']: arguments=self.__config['input']['arguments'] self.__input=init_object(self.__config['input']['name'], **arguments) elif self.__config['input']['type']=='tailer': if tailer is None: raise AttributeError("Missing tailer in config file for tailer type input") self.__input=tailer assert(self.__input) if 'parser' in self.__config: if self.__config['parser']['type']=='match': self.__parser=MatchParser(self.__config['parser']['pattern'].strip(), self.__config['parser']['transform'].strip()) elif self.__config['parser']['type']=='split': self.__parser=SplitParser(self.__config['parser']['delimiter'].strip(), self.__config['parser']['transform'].strip()) elif self.__config['parser']['type']=='dummy': self.__parser=DummyParser() elif self.__config['parser']['type']=='json': arguments={} if 'arguments' in self.__config['parser']: arguments=self.__config['parser']['arguments'] self.__parser=JsonGrepParser(**arguments) elif self.__config['parser']['type']=='class': arguments={} if 'arguments' in self.__config['parser']: arguments=self.__config['parser']['arguments'] self.__parser=init_object(self.__config['parser']['name'], **arguments) self.__running=True self.__session_id=str(uuid.uuid4()) self.__max_error_count=self.__config['input'].get('max_error_count', -1) self.__current_data=None self.__number_collected=0 self.__number_failed=0 self.__sleep_count=0 self.__error_count=0 self.__last_check_minute=-1 def quit(self): self.__running=False def info(self): col_info={"name":self.__collector_name, "config":self.__config, "sleep_time": self.__sleep_time} col_info["session_id"]=self.__session_id col_info["is_running"]=self.__running col_info["current_data"]=self.__current_data col_info["number_collected"]=self.__number_collected col_info["number_failed"]=self.__number_failed col_info["sleep_count"]=self.__sleep_count col_info["error_count"]=self.__error_count col_info["max_error_count"]=self.__max_error_count if self.__cron is not None: col_info["cron"]=self.__cron if self.__config['input']['type']=='tailer': col_info["tailer"]=self.__input.info(self.__config['input']['path']) return col_info def match_time(self): """Return True if this event should trigger at the specified datetime""" if self.__schedule is None: return False t=datetime.datetime.now() if t.minute==self.__last_check_minute: return False self.__last_check_minute=t.minute log.debug("check if cron job can be triggered. %d"%self.__last_check_minute) return self.__schedule.check_trigger((t.year,t.month,t.day,t.hour,t.minute)) def run(self): count = self.__sleep_time error_count = 0 log.info("Collector %s has started.", self.__collector_name) while self.__running: args = {'config': self.__config['input']} if (self.__schedule is None and count == self.__sleep_time) or self.match_time(): log.debug("Starting to collect data.") count = 0 data = None no_msgs = 1 try: data = self.__input.get_data(**args) if isinstance(data, collections.deque) or isinstance(data, list): self.__current_data = [l.decode('ASCII', 'ignore') for l in data] payload = [] no_msgs = len(data) for line in data: log.debug("Raw data: %s", line) payload.append(self.generate_payload(str(line.decode('ASCII', 'ignore')))) if len(payload) > 0: self.__output.push(payload) else: continue else: # a block of data: either string to be parsed or dict self.__current_data = data log.debug("Raw data: %s", data) if isinstance(data, str): payload = self.generate_payload(str(data.decode('ASCII', 'ignore'))) else: payload = self.generate_payload(data) self.__output.push(payload) except: self.__current_data = data log.exception('Unable to get or parse data. data: %s', data) error_count += 1 if self.__max_error_count > 0 and error_count >= self.__max_error_count: self.__running = False self.__error_count = error_count break self.__number_failed += no_msgs if self.__config['input']['type'] == 'tailer': self.__input.fail(**args) else: error_count = 0 self.__number_collected += no_msgs if self.__config['input']['type'] == 'tailer': self.__input.success(**args) self.__error_count = error_count else: time.sleep(1) if self.__schedule is None: count += 1 self.__sleep_count = count self.__output.close() log.info("Collector %s has stopped.", self.__collector_name) def generate_payload(self, data): """Parse raw data and package the result in required format""" if self.__parser: data = self.__parser.parse(data) log.debug("Parser %s parsed data %s: ", self.__parser.__class__.__name__, data) payload = {"id": str(uuid.uuid4()), "session": self.__session_id} payload['data'] = data if 'metadata' in self.__config: for m in self.__config['metadata']: payload[m] = self.__config['metadata'][m] log.debug("payload to push: %s", payload) return payload
class Collector(threading.Thread): def __init__(self, collector_name, config, output, tailer=None): threading.Thread.__init__(self, name=collector_name) self.__collector_name=collector_name self.__config=config self.__sleep_time=self.__config['input'].get('frequency',10) self.__cron=self.__config['input'].get('schedule',None) self.__schedule=None if self.__cron is not None: self.__schedule=CronEvent(self.__cron) log.debug("job scheduled at %s"%self.__schedule.numerical_tab) self.__input=None self.__parser=None self.__output=output if self.__config['input']['type']=='command': self.__input=CommandRunner(self.__config['input']['source']) elif self.__config['input']['type']=='file': self.__input=FileReader(self.__config['input']['path']) elif self.__config['input']['type']=='http': #log.debug('input %s'%self.__config['input']) url=self.__config['input']['url'] headers=self.__config['input'].get('headers', {}) #log.debug('headers %s'%headers) auth=self.__config['input'].get('auth', None) self.__input=HTTPReader(url, headers, auth) elif self.__config['input']['type']=='class': arguments={} if 'arguments' in self.__config['input']: arguments=self.__config['input']['arguments'] self.__input=init_object(self.__config['input']['name'], **arguments) elif self.__config['input']['type']=='tailer': if tailer is None: raise AttributeError("Missing tailer in config file for tailer type input") self.__input=tailer print(self.__input) assert(self.__input) if 'parser' in self.__config: if self.__config['parser']['type']=='match': self.__parser=MatchParser(self.__config['parser']['pattern'].strip(), self.__config['parser']['transform'].strip()) elif self.__config['parser']['type']=='split': self.__parser=SplitParser(self.__config['parser']['delimiter'].strip(), self.__config['parser']['transform'].strip()) elif self.__config['parser']['type']=='dummy': self.__parser=DummyParser() elif self.__config['parser']['type']=='json': arguments={} if 'arguments' in self.__config['parser']: arguments=self.__config['parser']['arguments'] self.__parser=JsonGrepParser(**arguments) elif self.__config['parser']['type']=='class': arguments={} if 'arguments' in self.__config['parser']: arguments=self.__config['parser']['arguments'] self.__parser=init_object(self.__config['parser']['name'], **arguments) self.__running=True self.__session_id=str(uuid.uuid4()) self.__max_error_count=self.__config['input'].get('max_error_count', -1) self.__current_data=None self.__number_collected=0 self.__number_failed=0 self.__sleep_count=0 self.__error_count=0 self.__last_check_minute=-1 def quit(self): self.__running=False def info(self): col_info={"name":self.__collector_name, "config":self.__config, "sleep_time": self.__sleep_time} col_info["session_id"]=self.__session_id col_info["is_running"]=self.__running col_info["current_data"]=self.__current_data col_info["number_collected"]=self.__number_collected col_info["number_failed"]=self.__number_failed col_info["sleep_count"]=self.__sleep_count col_info["error_count"]=self.__error_count col_info["max_error_count"]=self.__max_error_count if self.__cron is not None: col_info["cron"]=self.__cron if self.__config['input']['type']=='tailer': col_info["tailer"]=self.__input.info(self.__config['input']['path']) return col_info def match_time(self): """Return True if this event should trigger at the specified datetime""" if self.__schedule is None: return False t=datetime.datetime.now() if t.minute==self.__last_check_minute: return False self.__last_check_minute=t.minute log.debug("check if cron job can be triggered. %d"%self.__last_check_minute) return self.__schedule.check_trigger((t.year,t.month,t.day,t.hour,t.minute)) def run(self): count=self.__sleep_time error_count=0 log.info("Collector %s has started."%self.__collector_name) while self.__running: args={'config': self.__config['input']} if (self.__schedule is None and count==self.__sleep_time) or self.match_time(): log.debug("Starting to collect data.") count = 0 data = None no_msgs = 1 try: data = self.__input.get_data(**args) if isinstance(data, collections.deque) or isinstance(data, list): self.__current_data=[l.decode('ASCII','ignore') for l in data] payload=[] no_msgs=len(data) for line in data: log.debug("raw data %s"%line) payload.append(self.generate_payload(str(line.decode('ASCII','ignore')))) if len(payload)>0: self.__output.push(payload) else: continue else: self.__current_data = data log.debug("Raw data %s" % data) payload = self.generate_payload(str(data.decode('ASCII','ignore'))) self.__output.push(payload) except: self.__current_data = data log.exception('Unable to get or parse data. data: %s' % data) error_count += 1 if self.__max_error_count > 0 and error_count >= self.__max_error_count: self.__running = False self.__error_count == error_count break self.__number_failed += no_msgs if self.__config['input']['type'] == 'tailer': self.__input.fail(**args) else: error_count=0 self.__number_collected+=no_msgs if self.__config['input']['type']=='tailer': self.__input.success(**args) self.__error_count==error_count else: time.sleep(1) if self.__schedule is None: count += 1 self.__sleep_count = count self.__output.close() log.info("Collector %s has stopped." % self.__collector_name) def generate_payload(self, data): """Parse raw data and package the result in required format""" if self.__parser: data = self.__parser.parse(data) log.debug("parsed data %s", data) payload = {"id": str(uuid.uuid4()), "session": self.__session_id} payload['data'] = data if 'metadata' in self.__config: for m in self.__config['metadata']: payload[m] = self.__config['metadata'][m] log.debug("payload to push: %s", payload) return payload