Пример #1
0
    def parse_master_args(self, master_arg):
        """
        Parse master arguments
        """
        # Collector running with or without master
        if self.args.no_master:
            return None

        master = AttrDict()
        try:
            m = self.args.master.split(":")
            # So that order of keys is not a factor
            for a in m:
                a = a.split("=")
                if a[0] == "host":
                    master.host = a[-1]
                elif a[0] == "port":
                    master.port = a[-1]
                elif a[0] == "topic_name":
                    master.topic_name = a[-1]
                else:
                    raise ValueError

        except ValueError:
            raise Exception("Invalid argument arg: {}".format(
                self.args.master))
        return master
Пример #2
0
 def _init_logaggfs_paths(self, logaggfs_dir):
     '''
     Logaggfs directories and file initialization
     '''
     logaggfs = AttrDict()
     logaggfs.logcache = logaggfs_dir
     logaggfs.logs_dir = os.path.abspath(os.path.join(logaggfs.logcache, 'logs'))
     logaggfs.trackfiles = os.path.abspath(os.path.join(logaggfs.logcache, 'trackfiles.txt'))
     return logaggfs
Пример #3
0
    def start(self):
        state = AttrDict(files_tracked=list())
        util.start_daemon_thread(self._scan_fpatterns, (state, ))

        state = AttrDict(last_push_ts=time.time())
        util.start_daemon_thread(self.send_to_nsq, (state, ))

        state = AttrDict(heartbeat_number=0)
        th_heartbeat = util.start_daemon_thread(self.send_heartbeat, (state, ))

        while True:
            th_heartbeat.join(1)
            if not th_heartbeat.isAlive(): break
Пример #4
0
    def _parse_auth_args(self):
        auth_dict = dict()

        if self.args.auth:
            args = self.args.auth.split(':')

            for a in args:
                key, value = a.split('=')
                auth_dict[key] = value

            return AttrDict(auth_dict)
        else:
            auth_dict = {'key': None, 'secret': None}
            return AttrDict(auth_dict)
Пример #5
0
    def collect(self):

        # start tracking files and put formatted log lines into queue
        state = AttrDict(files_tracked=list())
        util.start_daemon_thread(self._scan_fpatterns, (state,))

        # start extracting formatted logs from queue and send to nsq
        state = AttrDict(last_push_ts=time.time())
        util.start_daemon_thread(self._send_to_nsq, (state,))

        # start sending heartbeat to "Hearbeat" topic
        state = AttrDict(heartbeat_number=0)
        self.log.info('init_heartbeat')
        th_heartbeat = util.start_daemon_thread(self._send_heartbeat, (state,))
Пример #6
0
    def get_history(self, before, after=GMAIL_CREATED_TS):
        """
        Get all the msgs from the user's mailbox with in given dates and store in the db
        Note : Gmail api will consider 'before' : excluded date, 'after' : included date
        Eg: before : 2017/01/01, after : 2017/01/31 then gmail api gives msgs from 2017/01/02 - 2017/01/31

        :ref : https://developers.google.com/gmail/api/guides/filtering
        :calls : GET https://www.googleapis.com/gmail/v1/users/userId/messages

        :param before : string
        :param after : string
        :rtype : list

        >>> from mock import Mock
        >>> obj = GmailHistory()
        >>> obj.gmail = Mock()
        >>> api_doc = {'messages':[{'id':'163861dac0f17c61'},{'id':'1632163b6a84ab94'}]}
        >>> obj.gmail.users().messages().list().execute = Mock(obj.gmail.users().messages().list().execute, return_value=api_doc)
        >>> obj.store_msgs_in_db = Mock()
        >>> obj.get_history('2017/05/10')
        [{'id': '163861dac0f17c61'}, {'id': '1632163b6a84ab94'}]

        """
        self.log.debug("fun get history")

        query = "{} before:{} after:{}".format(self.query, before, after)
        response = (self.gmail.users().messages().list(
            userId="me", maxResults=self.MAX_RESULTS, q=query).execute())
        msgs = []
        response = AttrDict(response)

        if "messages" in response:
            msgs.extend(response.messages)
            self.store_msgs_in_db(response.messages)

        while "nextPageToken" in response:
            page_token = response.nextPageToken
            response = (self.gmail.users().messages().list(
                userId="me",
                maxResults=self.MAX_RESULTS,
                q=query,
                pageToken=page_token,
            ).execute())
            response = AttrDict(response)

            if response.resultSizeEstimate is not 0:
                msgs.extend(response.messages)
                self.store_msgs_in_db(response.messages)

        return msgs
Пример #7
0
    def _parse_master_args(self):

        if self.args.no_master:
            return None

        master = AttrDict()
        try:
            m = self.args.master.split(":")
            for a in m:
                a = a.split("=")
                if a[0] == "host":
                    master.host = a[-1]
                elif a[0] == "port":
                    master.port = a[-1]
                elif a[0] == "key":
                    master.key = a[-1]
                elif a[0] == "secret":
                    master.secret = a[-1]
                else:
                    raise ValueError
        except ValueError:
            raise Exception("Invalid Argument", self.args.master)
        if not master.keys() == ["host", "port", "key", "secret"]:
            raise Exception("Invalid Argument", self.args.master)
        return master
Пример #8
0
 def ensure_master(self):
     '''
     Check if Master details are present
     '''
     if not self.state['master']:
         err_msg = 'No master details stored locally'
         prRed(err_msg)
         sys.exit(1)
     else:
         return AttrDict(self.state['master'])
Пример #9
0
    def _init_logaggfs_paths(self, logaggfs_dir):
        """
        Logaggfs directories and file initialization

        >>> test_dir = utils.ensure_dir('/tmp/xyz')
        >>> trackfile = open(test_dir+'/trackfiles.txt', 'w+'); trackfile.close()
        >>> lc = collector = LogCollector('localhost', '1088', None, test_dir, test_dir)

        >>> lc._init_logaggfs_paths(test_dir)
        AttrDict({'logcache': '/tmp/xyz', 'logs_dir': '/tmp/xyz/logs', 'trackfiles': '/tmp/xyz/trackfiles.txt'})

        >>> import shutil; shutil.rmtree(test_dir)
        """
        logaggfs = AttrDict()
        logaggfs.logcache = logaggfs_dir
        logaggfs.logs_dir = os.path.abspath(
            os.path.join(logaggfs.logcache, "logs"))
        logaggfs.trackfiles = os.path.abspath(
            os.path.join(logaggfs.logcache, "trackfiles.txt"))
        return logaggfs
Пример #10
0
    def collect(self):
        master = AttrDict()
        try:
            m = self.args.master.split(':')
            # So that order of keys is not a factor
            for a in m:
                a = a.split('=')
                if a[0] == 'host': master.host = a[-1]
                elif a[0] == 'port': master.port = a[-1]
                elif a[0] == 'key': master.key = a[-1]
                elif a[0] == 'secret': master.secret = a[-1]
                else: raise ValueError

        except ValueError:
            raise InvalidArgument(self.args.master)

        # Create collector object
        collector = LogCollector(self.args.data_dir, self.args.logaggfs_dir,
                                 master, self.log)

        collector_api = CollectorService(collector, self.log)
        api = API()
        api.register(collector_api, 'v1')

        app = tornado.web.Application([
            (r'^/logagg/.*', RequestHandler, dict(api=api)),
        ])

        app.listen(self.args.port)
        tornado.ioloop.IOLoop.current().start()
Пример #11
0
    def get_channelname(self, _id):
        """
        >>> from mock import Mock
        >>> def side_effect(value, channel='qwer'):
        ...     if value == 'channels.info':
        ...         return {'channel': {'name': 'asdf'}}
        ...     if value == 'groups.info':
        ...         return {"group": {'name': 'abcd'}}
        ...     if value == 'im.list':
        ...         return {'ims': [{'id': 'D1234', 'user': '******'}, {'id': 'D6521', 'user': '******'}]}
        >>> ob = SlackDump()
        >>> ob.slack.api_call = Mock(side_effect=side_effect)
        >>> ob.h.get_username = Mock(ob.h.get_username, return_value='just')
        >>> ob.get_channelname('G1234')
        'abcd'
        >>> ob.get_channelname('C1234')
        'asdf'
        >>> ob.get_channelname('D1234')
        'just'
        """
        name = AttrDict(self.channel_name_cache)

        if _id in name:
            return name.id

        if _id.startswith("C"):
            info = self.slack.api_call("channels.info", channel=_id)
            name.id = info["channel"]["name"]

        elif _id.startswith("G"):
            info = self.slack.api_call("groups.info", channel=_id)
            name.id = info["group"]["name"]

        elif _id.startswith("D"):
            ch_list = self.slack.api_call("im.list")["ims"]
            for counter in range(len(ch_list)):
                if ch_list[counter]["id"] == _id:
                    name.id = self.h.get_username(ch_list[counter]["user"])

        return name.id
    def parse_master_args(self, master_arg):
        '''
        Parse master arguments
        '''
        # Collector running with or without master
        if not self.args.no_master:
            master = AttrDict()
            try:
                m = self.args.master.split(':')
                # So that order of keys is not a factor
                for a in m:
                    a = a.split('=')
                    if a[0] == 'host': master.host = a[-1]
                    elif a[0] == 'port': master.port = a[-1]
                    elif a[0] == 'cluster_name': master.cluster_name = a[-1]
                    elif a[0] == 'cluster_passwd':
                        master.cluster_passwd = a[-1]
                    else:
                        raise ValueError

            except ValueError:
                raise Exception('Invalid argument arg: {}'.format(
                    self.args.master))
            return master

        else:
            return None
Пример #13
0
    def get_history(self, slack, _id, _name, end_ts, start_ts=0):
        """
        >>> from mock import Mock
        >>> ob = SlackHistory()
        >>> ob.slack.api_call = Mock(ob.slack.api_call)
        >>> ob.slack.api_call.side_effect= [{'messages' :[{'message': 'Dummy <@123>', 'ts': '123.234'}], 'has_more': True}, {'messages' :[{'message': 'Dummy <@123>', 'ts': '122.234'}], 'has_more': False}]
        >>> ob.parse_dict = Mock(ob.parse_dict, return_value={'message': 'Dummy @asdf', 'ts': '123.234'})
        >>> ob._write_messages = Mock()
        >>> ob.get_history('users.info', '1234', 'general', 12345)
        2
        """
        messages = []
        ts = end_ts
        num = 0
        while True:
            response = self.slack.api_call(
                slack, channel=_id, latest=ts, oldest=start_ts, count=1000
            )
            if "messages" not in response:
                return num

            messages.extend(response["messages"])
            messages = sorted(messages, key=itemgetter("ts"))
            for message in messages:
                msg = AttrDict(message)
                msg.channel = _id
                msg.channel_name = _name

                msg = self.parse_dict(msg)

                self._write_messages(dict(msg))

            num += len(messages)

            if response["has_more"]:
                ts = messages[-1]["ts"]
                messages = []
            else:
                return num
Пример #14
0
    def _update_topic_components(self, topic_name):
        '''
        Starts a deamon thread for reading from heartbeat topic and updarting info in database
        '''
        topic_info = self.topic_collection.find_one({'topic_name': topic_name})
        topic = topic_info['heartbeat_topic']
        nsqd_tcp_address = topic_info['nsqd_tcp_address']
        nsqd_http_address = topic_info['nsqd_http_address']
        nsq_api_address = topic_info['nsq_api_address']

        url = self.NSQ_API_URL.format(nsq_api_address=nsq_api_address,
                                      nsqd_tcp_address=nsqd_tcp_address,
                                      nsqd_http_address=nsqd_http_address,
                                      topic=topic,
                                      empty_lines='no')
        try:
            self.log.info("updating_components", topic=topic_name)
            resp = requests.get(url, stream=True)
            start_read_heartbeat = time.time()
            for heartbeat in resp.iter_lines():
                heartbeat = AttrDict(json.loads(heartbeat.decode('utf-8')))
                host = heartbeat.host
                port = heartbeat.port
                self.component_collection.update_one(
                    {
                        'host': host,
                        'port': port
                    }, {'$set': heartbeat},
                    upsert=True)

        except requests.exceptions.ConnectionError:
            self.log.warn('cannot_request_nsq_api___will_try_again', url=url)

        except KeyboardInterrupt:
            if resp: resp.close()
            sys.exit(0)
        time.sleep(self.UPDATE_COMPONENTS_INTERVAL)
Пример #15
0
    def real_time_message(self):
        self.log.info("completed history")
        while True:
            msg = self.slack.rtm_read()

            if not msg:
                time.sleep(self.EMPTY_READ_SLEEP_INTERVAL)
                continue

            if len(msg[0]) == 1:
                continue
            msg = AttrDict(msg[0])
            self.log.debug("Message recieved", msg=msg)

            msg = self.parse_message(msg)
            self.h._write_messages(dict(msg))
Пример #16
0
    def register_component(self, namespace: str, topic_name: str, host: str,
                           port: str) -> dict:
        '''
        Validate auth details and store details of component in database
        Sample url:
        'http://localhost:1088/logagg/v1/register_component?namespace=master&topic_name=logagg&host=78.47.113.210&port=1088'
        '''

        # find for topic or create one
        topic = self.master.topic_collection.find_one(
            {'topic_name': topic_name})
        if not topic: create_topic_result = self._create_topic(topic_name)

        if topic or create_topic_result.get('success'):
            component = {
                'namespace': namespace,
                'host': host,
                'port': str(port),
                'topic_name': topic_name
            }

            component_info = AttrDict(component)
            try:
                topic_name = component_info.topic_name
                namespace = component_info.namespace
                host = component_info.host
                port = component_info.port
                self.master.component_collection.update_one(
                    {
                        'topic_name': topic_name,
                        'namespace': namespace,
                        'host': host,
                        'port': port
                    }, {'$set': component},
                    upsert=True)
                return {'success': True}
            except pymongo.errors.DuplicateKeyError as dke:
                return {
                    'success': True,
                    'details': 'Duplicate component details'
                }

        else:
            return create_topic_result
Пример #17
0
    def _resolve_call_info(self, request):
        r = AttrDict()
        r.time_deserialize = 0.0
        r.namespace = None
        r.function = None
        r.method = "GET"

        urlp = urlparse(request.url)
        path_parts = urlp.path.lstrip("/").split("/")
        path_parts = path_parts[1:]  # ignore "/api/" part

        version = path_parts[0]
        fn_name = path_parts[-1]
        path_parts = path_parts[:-1]
        r.function = fn_name

        if self.api.isversion(version):
            namespace = "/".join(path_parts[1:])
        else:
            version = self.api.get_default_version()
            namespace = "/".join(path_parts)

        namespace = namespace if namespace else None
        r.namespace = namespace or ""

        request.log = self.log.bind(
            __requestid=request.id,
            namespace=r.namespace,
            function=fn_name,
            apiid=self.api._id,
        )

        query_string = urlp.query

        request.fn_name = fn_name
        if not self.api.has_api_fn(fn_name, version, namespace):
            raise UnknownAPIFunction(fn_name)

        fninfo = self.api.get_api_fn(fn_name, version, namespace)
        request.fn = fninfo["obj"]
        info = fninfo["info"]
        params = info["params"]

        # parse function arguments from the request
        param_vals = dict((k, v[0]) for k, v in parse_qs(query_string).items())

        for key, val in param_vals.items():
            try:
                if params.get(key, {}).get("type", "") == type(val):
                    continue
                param_vals[key] = ast.literal_eval(val)
            except:  # FIXME: bald except!
                param_vals[key] = val

        if request.method == "POST":
            r.method = "POST"
            protocol = self._find_request_protocol(request)

            for stream_param in params:
                try:
                    if params[stream_param]["type"] == typing.Generator:
                        stream_param = stream_param
                        break
                except AttributeError:
                    continue
            else:
                stream_param = None

            # FIXME: request.body: what type is it supposed to be? byte string or file like?
            if not stream_param:
                t = time.time()
                p = protocol.deserialize(request.body)
                r.deserialize_time = time.time() - t

                param_vals.update(protocol.deserialize(request.body))
            else:
                param_vals[stream_param] = protocol.deserialize_stream(
                    request.body)

        if info.get("req", None):
            param_vals["req"] = request

        request.fn_params = param_vals

        return r
Пример #18
0
    def run(self):

        port = self.args.port
        host = self.args.host 

        auth = AttrDict()
        try:
            m = self.args.auth.split(':')
            # So that order of keys is not a factor
            for a in m:
                a = a.split('=')
                if a[0] == 'key': auth.key = a[-1]
                elif a[0] == 'secret': auth.secret = a[-1]
                else: raise ValueError

        except ValueError:
            raise InvalidArgument(self.args.auth)

        mongodb = AttrDict()
        try:
            m = self.args.mongodb.split(':')
            for a in m:
                a = a.split('=')
                if a[0] == 'host': mongodb.host = a[-1]
                elif a[0] == 'port': mongodb.port = a[-1]
                elif a[0] == 'user': mongodb.user = a[-1]
                elif a[0] == 'passwd': mongodb.passwd = a[-1]
                elif a[0] == 'db': mongodb.name = a[-1]
                else: raise ValueError

        except ValueError:
            raise InvalidArgument(self.args.mongodb)

        # Create LogaggService object
        ls = Master(host,
                port,
                mongodb,
                auth,
                self.log)

        master_api = MasterService(ls, self.log)
        api = API()
        api.register(master_api, 'v1')

        app = tornado.web.Application([
            (r'^/logagg/.*', RequestHandler, dict(api=api)),
                ])

        app.listen(self.args.port)
        tornado.ioloop.IOLoop.current().start()
Пример #19
0
    def _resolve_call_info(self, request):
        r = AttrDict()
        r.time_deserialize = 0.0
        r.namespace = None
        r.function = None
        r.method = 'GET'

        urlp = urlparse(request.url)
        path_parts = urlp.path.lstrip('/').split('/')
        path_parts = path_parts[1:] # ignore "/api/" part

        version = path_parts[0]
        fn_name = path_parts[-1]
        path_parts = path_parts[:-1]
        r.function = fn_name

        if self.api.isversion(version):
            namespace = '/'.join(path_parts[1:])
        else:
            version = self.api.get_default_version()
            namespace = '/'.join(path_parts)

        namespace = namespace if namespace else None
        r.namespace = namespace or ''

        request.netpath = '{netpath}=>{id}_{reqid}({namespace}/{function})'.format(
            netpath=request.headers.get(NETPATH_HEADER, ''),
            id=self.api.id,
            reqid=request.id,
            namespace=r.namespace,
            function=fn_name,
        )

        request.log = self.log.bind(__netpath=request.netpath)

        request.apidoc = False
        if fn_name == 'apidoc':
            request.apidoc = True
            return self.api.doc(version, namespace), r

        query_string = urlp.query

        request.fn_name = fn_name
        if not self.api.has_api_fn(fn_name, version, namespace):
            raise UnknownAPIFunction(fn_name)

        fninfo = self.api.get_api_fn(fn_name, version, namespace)
        request.fn = fninfo['obj']
        info = fninfo['info']
        params = info['params']

        # parse function arguments from the request
        param_vals = dict((k, v[0]) \
            for k, v in parse_qs(query_string).items())

        for key, val in param_vals.items():
            try:
                param_vals[key] = ast.literal_eval(val)
            except: # FIXME: bald except!
                param_vals[key] = val

        if request.method == 'POST':
            r.method = 'POST'
            protocol = self._find_request_protocol(request)

            for stream_param in params:
                try:
                    if issubclass(params[stream_param]['type'], typing.Generator):
                        stream_param = stream_param
                        break
                except AttributeError:
                    continue
            else:
                stream_param = None

            # FIXME: request.body: what type is it supposed to be? byte string or file like?
            if not stream_param:
                t = time.time()
                p = protocol.deserialize(request.body)
                r.deserialize_time = time.time() - t

                param_vals.update(protocol.deserialize(request.body))
            else:
                param_vals[stream_param] = protocol.deserialize_stream(request.body)

        if info.get('req', None):
            param_vals['req'] = request

        request.fn_params = param_vals

        return request, r