def test_case3(self):
     parser=JsonGrepParser(pattern=". chargebackData", list_name="hcp-chargeback")
     input= '[{"chargebackData": 1}, {"chargebackData": 2}]'
     output=parser.parse(input)
     #print output
     self.assertTrue('timestamp' in output)
     self.assertTrue('hostname' in output)
     self.assertTrue('hcp-chargeback' in output)
     self.assertTrue(isinstance(output["hcp-chargeback"], list))
     self.assertTrue(len(output["hcp-chargeback"])==2)
     self.assertTrue(output["hcp-chargeback"][0]==1)
     self.assertTrue(output["hcp-chargeback"][1]==2)
 def test_case2(self):
     parser=JsonGrepParser(pattern="chargebackData", list_name="hcp-chargeback")
     input= '{"chargebackData":["chargebackData1","chargebackData2"]}'
     output=parser.parse(input)
     #print output
     self.assertTrue('timestamp' in output)
     self.assertTrue('hostname' in output)
     self.assertTrue('hcp-chargeback' in output)
     self.assertTrue(isinstance(output["hcp-chargeback"], list))
     self.assertTrue(len(output["hcp-chargeback"])==2)
     self.assertTrue(output["hcp-chargeback"][0]=='chargebackData1')
     self.assertTrue(output["hcp-chargeback"][1]=='chargebackData2')
 def test_case1(self):
     parser=JsonGrepParser(pattern="chargebackData", list_name="hcp-chargeback")
     input= '{"chargebackData":[{"systemName":"hcp1.s3.ersa.edu.au"}, {"systemName":"hcp2.s3.ersa.edu.au"}]}'
     output=parser.parse(input)
     #print output
     self.assertTrue('timestamp' in output)
     self.assertTrue('hostname' in output)
     self.assertTrue('hcp-chargeback' in output)
     self.assertTrue(isinstance(output["hcp-chargeback"], list))
     self.assertTrue(len(output["hcp-chargeback"])==2)
     self.assertTrue('systemName' in output["hcp-chargeback"][0])
     self.assertTrue(output["hcp-chargeback"][0]['systemName']=='hcp1.s3.ersa.edu.au')
     self.assertTrue('systemName' in output["hcp-chargeback"][1])
     self.assertTrue(output["hcp-chargeback"][1]['systemName']=='hcp2.s3.ersa.edu.au')
Exemplo n.º 4
0
class Collector(threading.Thread):
    def __init__(self, collector_name, config, output, tailer=None):
        threading.Thread.__init__(self, name=collector_name)
        self.__collector_name=collector_name
        self.__config=config
        self.__sleep_time=self.__config['input'].get('frequency',10)
        self.__cron=self.__config['input'].get('schedule',None)
        self.__schedule=None
        if self.__cron is not None:
            self.__schedule=CronEvent(self.__cron)
            log.debug("job scheduled at %s"%self.__schedule.numerical_tab)
        self.__input=None
        self.__parser=None
        self.__output=output

        if self.__config['input']['type']=='command':
            self.__input=CommandRunner(self.__config['input']['source'])
        elif self.__config['input']['type']=='file':
            self.__input=FileReader(self.__config['input']['path'])
        elif self.__config['input']['type']=='http':
            #log.debug('input %s'%self.__config['input'])
            url=self.__config['input']['url']
            headers=self.__config['input'].get('headers', {})
            #log.debug('headers %s'%headers)
            auth=self.__config['input'].get('auth', None)
            self.__input=HTTPReader(url, headers, auth)
        elif self.__config['input']['type']=='class':
            arguments={}
            if 'arguments' in self.__config['input']:
                arguments=self.__config['input']['arguments']
            self.__input=init_object(self.__config['input']['name'], **arguments)
        elif self.__config['input']['type']=='tailer':
            if tailer is None:
                raise AttributeError("Missing tailer in config file for tailer type input")
            self.__input=tailer

        assert(self.__input)

        if 'parser' in self.__config:
            if self.__config['parser']['type']=='match':
                self.__parser=MatchParser(self.__config['parser']['pattern'].strip(), self.__config['parser']['transform'].strip())
            elif self.__config['parser']['type']=='split':
                self.__parser=SplitParser(self.__config['parser']['delimiter'].strip(), self.__config['parser']['transform'].strip())
            elif self.__config['parser']['type']=='dummy':
                self.__parser=DummyParser()
            elif self.__config['parser']['type']=='json':
                arguments={}
                if 'arguments' in self.__config['parser']:
                    arguments=self.__config['parser']['arguments']
                self.__parser=JsonGrepParser(**arguments)
            elif self.__config['parser']['type']=='class':
                arguments={}
                if 'arguments' in self.__config['parser']:
                    arguments=self.__config['parser']['arguments']
                self.__parser=init_object(self.__config['parser']['name'], **arguments)
        self.__running=True
        self.__session_id=str(uuid.uuid4())
        self.__max_error_count=self.__config['input'].get('max_error_count', -1)
        self.__current_data=None
        self.__number_collected=0
        self.__number_failed=0
        self.__sleep_count=0
        self.__error_count=0
        self.__last_check_minute=-1

    def quit(self):
        self.__running=False

    def info(self):
        col_info={"name":self.__collector_name, "config":self.__config, "sleep_time": self.__sleep_time}
        col_info["session_id"]=self.__session_id
        col_info["is_running"]=self.__running
        col_info["current_data"]=self.__current_data
        col_info["number_collected"]=self.__number_collected
        col_info["number_failed"]=self.__number_failed
        col_info["sleep_count"]=self.__sleep_count
        col_info["error_count"]=self.__error_count
        col_info["max_error_count"]=self.__max_error_count
        if self.__cron is not None:
            col_info["cron"]=self.__cron
        if self.__config['input']['type']=='tailer':
            col_info["tailer"]=self.__input.info(self.__config['input']['path'])
        return col_info

    def match_time(self):
        """Return True if this event should trigger at the specified datetime"""
        if self.__schedule is None:
            return False
        t=datetime.datetime.now()
        if t.minute==self.__last_check_minute:
            return False
        self.__last_check_minute=t.minute
        log.debug("check if cron job can be triggered. %d"%self.__last_check_minute)
        return self.__schedule.check_trigger((t.year,t.month,t.day,t.hour,t.minute))

    def run(self):
        count = self.__sleep_time
        error_count = 0
        log.info("Collector %s has started.", self.__collector_name)
        while self.__running:
            args = {'config': self.__config['input']}
            if (self.__schedule is None and count == self.__sleep_time) or self.match_time():
                log.debug("Starting to collect data.")
                count = 0
                data = None
                no_msgs = 1
                try:
                    data = self.__input.get_data(**args)
                    if isinstance(data, collections.deque) or isinstance(data, list):
                        self.__current_data = [l.decode('ASCII', 'ignore') for l in data]
                        payload = []
                        no_msgs = len(data)
                        for line in data:
                            log.debug("Raw data: %s", line)
                            payload.append(self.generate_payload(str(line.decode('ASCII', 'ignore'))))
                        if len(payload) > 0:
                            self.__output.push(payload)
                        else:
                            continue
                    else:
                        # a block of data: either string to be parsed or dict
                        self.__current_data = data
                        log.debug("Raw data: %s", data)
                        if isinstance(data, str):
                            payload = self.generate_payload(str(data.decode('ASCII', 'ignore')))
                        else:
                            payload = self.generate_payload(data)
                        self.__output.push(payload)
                except:
                    self.__current_data = data
                    log.exception('Unable to get or parse data. data: %s', data)
                    error_count += 1
                    if self.__max_error_count > 0 and error_count >= self.__max_error_count:
                        self.__running = False
                        self.__error_count = error_count
                        break
                    self.__number_failed += no_msgs
                    if self.__config['input']['type'] == 'tailer':
                        self.__input.fail(**args)
                else:
                    error_count = 0
                    self.__number_collected += no_msgs
                    if self.__config['input']['type'] == 'tailer':
                        self.__input.success(**args)
                self.__error_count = error_count
            else:
                time.sleep(1)
                if self.__schedule is None:
                    count += 1

            self.__sleep_count = count

        self.__output.close()
        log.info("Collector %s has stopped.", self.__collector_name)

    def generate_payload(self, data):
        """Parse raw data and package the result in required format"""
        if self.__parser:
            data = self.__parser.parse(data)
            log.debug("Parser %s parsed data %s: ", self.__parser.__class__.__name__, data)

        payload = {"id": str(uuid.uuid4()), "session": self.__session_id}
        payload['data'] = data
        if 'metadata' in self.__config:
            for m in self.__config['metadata']:
                payload[m] = self.__config['metadata'][m]
        log.debug("payload to push: %s", payload)
        return payload
Exemplo n.º 5
0
class Collector(object):
    """Collect from an input, process it and push by an output"""
    def __init__(self, configuration):
        self.config = configuration
        self.input = None
        self.parser = None
        self.metadata = None

        # Input is required
        arguments = {}
        if 'arguments' in self.config['input']:
            arguments = self.config['input']['arguments']
        if self.config['input']['type'] == 'command':
            self.input = CommandRunner(**arguments)
        elif self.config['input']['type'] == 'file':
            self.input = FileReader(**arguments)
        elif self.config['input']['type'] == 'http':
            self.input = HTTPReader(**arguments)
        elif self.config['input']['type'] == 'class':
            self.input = init_object(self.config['input']['name'], **arguments)
        elif self.config['input']['type'] == 'tailer':
            if 'tailer' in config:
                self.input = Tailer(config['tailer'])
            else:
                raise AttributeError(
                    "Missing tailer in config file for tailer type input")

        assert self.input

        # parser is optional for parsing data collected by input
        if 'parser' in self.config:
            arguments = {}
            if 'arguments' in self.config['parser']:
                arguments = self.config['parser']['arguments']
            if self.config['parser']['type'] == 'match':
                self.parser = MatchParser(
                    self.config['parser']['pattern'].strip(),
                    self.config['parser']['transform'].strip())
            elif self.config['parser']['type'] == 'split':
                self.parser = SplitParser(
                    self.config['parser']['delimiter'].strip(),
                    self.config['parser']['transform'].strip())
            elif self.config['parser']['type'] == 'dummy':
                self.parser = DummyParser()
            elif self.config['parser']['type'] == 'json':
                self.parser = JsonGrepParser(**arguments)
            elif self.config['parser']['type'] == 'class':
                self.parser = init_object(self.config['parser']['name'],
                                          **arguments)

        self._max_error_count = self.config['input'].get('max_error_count', -1)
        self._current_data = None
        self._number_collected = 0
        self._number_failed = 0
        self._error_count = 0

        self._output = create_output(config['output'])

        if 'metadata' in self.config:
            self.metadata = self.config['metadata']

    def collect(self):
        """Collect data and output to target"""
        error_count = 0
        args = {'config': self.config['input']}
        log.debug("Starting to collect data.")
        data = None
        no_msgs = 1
        try:
            data = self.input.get_data(**args)
            if isinstance(data, collections.deque) or isinstance(data, list):
                self._current_data = [
                    l.decode('ASCII', 'ignore') for l in data
                ]
                payload = []
                no_msgs = len(data)
                for line in data:
                    log.debug("Raw data: %s", line)
                    payload.append(
                        self.generate_package(
                            str(line.decode('ASCII', 'ignore'))))
                if len(payload) > 0:
                    self._output.push(payload)
            else:
                # a block of data: either string to be parsed or dict
                self._current_data = data
                log.debug("Raw data: %s", data)
                if isinstance(data, str):
                    payload = self.generate_package(
                        str(data.decode('ASCII', 'ignore')))
                else:
                    payload = self.generate_package(data)
                self._output.push(payload)
        except:
            self._current_data = data
            log.exception('Unable to get or parse data. data: %s', data)
            error_count += 1
            if self._max_error_count > 0 and error_count >= self._max_error_count:
                self._error_count = error_count
            self._number_failed += no_msgs
            if self.config['input']['type'] == 'tailer':
                self.input.fail(**args)
        else:
            error_count = 0
            self._number_collected += no_msgs
            if self.config['input']['type'] == 'tailer':
                self.input.success(**args)
        self._error_count = error_count

        self._output.close()

    def generate_package(self, data):
        """Parse raw data and package the result in required format"""
        if self.parser:
            data = self.parser.parse(data)
            log.debug("Parser %s parsed data %s: ",
                      self.parser.__class__.__name__, data)

        log.debug("Data to be packaged: %s", data)
        return generate_payload(data, self.metadata)
Exemplo n.º 6
0
class Collector(threading.Thread):
    def __init__(self, collector_name, config, output, tailer=None):
        threading.Thread.__init__(self, name=collector_name)
        self.__collector_name=collector_name
        self.__config=config
        self.__sleep_time=self.__config['input'].get('frequency',10)
        self.__cron=self.__config['input'].get('schedule',None)
        self.__schedule=None
        if self.__cron is not None:
            self.__schedule=CronEvent(self.__cron)
            log.debug("job scheduled at %s"%self.__schedule.numerical_tab)
        self.__input=None
        self.__parser=None
        self.__output=output

        if self.__config['input']['type']=='command':
            self.__input=CommandRunner(self.__config['input']['source'])
        elif self.__config['input']['type']=='file':
            self.__input=FileReader(self.__config['input']['path'])
        elif self.__config['input']['type']=='http':
            #log.debug('input %s'%self.__config['input'])
            url=self.__config['input']['url']
            headers=self.__config['input'].get('headers', {})
            #log.debug('headers %s'%headers)
            auth=self.__config['input'].get('auth', None)
            self.__input=HTTPReader(url, headers, auth)
        elif self.__config['input']['type']=='class':
            arguments={}
            if 'arguments' in self.__config['input']:
                arguments=self.__config['input']['arguments']
            self.__input=init_object(self.__config['input']['name'], **arguments)
        elif self.__config['input']['type']=='tailer':
            if tailer is None:
                raise AttributeError("Missing tailer in config file for tailer type input")
            self.__input=tailer

        print(self.__input)
        assert(self.__input)

        if 'parser' in self.__config:
            if self.__config['parser']['type']=='match':
                self.__parser=MatchParser(self.__config['parser']['pattern'].strip(), self.__config['parser']['transform'].strip())
            elif self.__config['parser']['type']=='split':
                self.__parser=SplitParser(self.__config['parser']['delimiter'].strip(), self.__config['parser']['transform'].strip())
            elif self.__config['parser']['type']=='dummy':
                self.__parser=DummyParser()
            elif self.__config['parser']['type']=='json':
                arguments={}
                if 'arguments' in self.__config['parser']:
                    arguments=self.__config['parser']['arguments']
                self.__parser=JsonGrepParser(**arguments)
            elif self.__config['parser']['type']=='class':
                arguments={}
                if 'arguments' in self.__config['parser']:
                    arguments=self.__config['parser']['arguments']
                self.__parser=init_object(self.__config['parser']['name'], **arguments)
        self.__running=True
        self.__session_id=str(uuid.uuid4())
        self.__max_error_count=self.__config['input'].get('max_error_count', -1)
        self.__current_data=None
        self.__number_collected=0
        self.__number_failed=0
        self.__sleep_count=0
        self.__error_count=0
        self.__last_check_minute=-1

    def quit(self):
        self.__running=False

    def info(self):
        col_info={"name":self.__collector_name, "config":self.__config, "sleep_time": self.__sleep_time}
        col_info["session_id"]=self.__session_id
        col_info["is_running"]=self.__running
        col_info["current_data"]=self.__current_data
        col_info["number_collected"]=self.__number_collected
        col_info["number_failed"]=self.__number_failed
        col_info["sleep_count"]=self.__sleep_count
        col_info["error_count"]=self.__error_count
        col_info["max_error_count"]=self.__max_error_count
        if self.__cron is not None:
            col_info["cron"]=self.__cron
        if self.__config['input']['type']=='tailer':
            col_info["tailer"]=self.__input.info(self.__config['input']['path'])
        return col_info

    def match_time(self):
        """Return True if this event should trigger at the specified datetime"""
        if self.__schedule is None:
            return False
        t=datetime.datetime.now()
        if t.minute==self.__last_check_minute:
            return False
        self.__last_check_minute=t.minute
        log.debug("check if cron job can be triggered. %d"%self.__last_check_minute)
        return self.__schedule.check_trigger((t.year,t.month,t.day,t.hour,t.minute))

    def run(self):
        count=self.__sleep_time
        error_count=0
        log.info("Collector %s has started."%self.__collector_name)
        while self.__running:
            args={'config': self.__config['input']}
            if (self.__schedule is None and count==self.__sleep_time) or self.match_time():
                log.debug("Starting to collect data.")
                count = 0
                data = None
                no_msgs = 1
                try:
                    data = self.__input.get_data(**args)
                    if isinstance(data, collections.deque) or isinstance(data, list):
                        self.__current_data=[l.decode('ASCII','ignore') for l in data]
                        payload=[]
                        no_msgs=len(data)
                        for line in data:
                            log.debug("raw data %s"%line)
                            payload.append(self.generate_payload(str(line.decode('ASCII','ignore'))))
                        if len(payload)>0:
                            self.__output.push(payload)
                        else:
                            continue
                    else:
                        self.__current_data = data
                        log.debug("Raw data %s" % data)
                        payload = self.generate_payload(str(data.decode('ASCII','ignore')))
                        self.__output.push(payload)
                except:
                    self.__current_data = data
                    log.exception('Unable to get or parse data. data: %s' % data)
                    error_count += 1
                    if self.__max_error_count > 0 and error_count >= self.__max_error_count:
                        self.__running = False
                        self.__error_count == error_count
                        break
                    self.__number_failed += no_msgs
                    if self.__config['input']['type'] == 'tailer':
                        self.__input.fail(**args)
                else:
                    error_count=0
                    self.__number_collected+=no_msgs
                    if self.__config['input']['type']=='tailer':
                        self.__input.success(**args)
                self.__error_count==error_count
            else:
                time.sleep(1)
                if self.__schedule is None:
                    count += 1

            self.__sleep_count = count

        self.__output.close()
        log.info("Collector %s has stopped." % self.__collector_name)

    def generate_payload(self, data):
        """Parse raw data and package the result in required format"""
        if self.__parser:
            data = self.__parser.parse(data)
        log.debug("parsed data %s", data)
        payload = {"id": str(uuid.uuid4()), "session": self.__session_id}
        payload['data'] = data
        if 'metadata' in self.__config:
            for m in self.__config['metadata']:
                payload[m] = self.__config['metadata'][m]
        log.debug("payload to push: %s", payload)
        return payload