def parse_master_args(self, master_arg): """ Parse master arguments """ # Collector running with or without master if self.args.no_master: return None master = AttrDict() try: m = self.args.master.split(":") # So that order of keys is not a factor for a in m: a = a.split("=") if a[0] == "host": master.host = a[-1] elif a[0] == "port": master.port = a[-1] elif a[0] == "topic_name": master.topic_name = a[-1] else: raise ValueError except ValueError: raise Exception("Invalid argument arg: {}".format( self.args.master)) return master
def _init_logaggfs_paths(self, logaggfs_dir): ''' Logaggfs directories and file initialization ''' logaggfs = AttrDict() logaggfs.logcache = logaggfs_dir logaggfs.logs_dir = os.path.abspath(os.path.join(logaggfs.logcache, 'logs')) logaggfs.trackfiles = os.path.abspath(os.path.join(logaggfs.logcache, 'trackfiles.txt')) return logaggfs
def start(self): state = AttrDict(files_tracked=list()) util.start_daemon_thread(self._scan_fpatterns, (state, )) state = AttrDict(last_push_ts=time.time()) util.start_daemon_thread(self.send_to_nsq, (state, )) state = AttrDict(heartbeat_number=0) th_heartbeat = util.start_daemon_thread(self.send_heartbeat, (state, )) while True: th_heartbeat.join(1) if not th_heartbeat.isAlive(): break
def _parse_auth_args(self): auth_dict = dict() if self.args.auth: args = self.args.auth.split(':') for a in args: key, value = a.split('=') auth_dict[key] = value return AttrDict(auth_dict) else: auth_dict = {'key': None, 'secret': None} return AttrDict(auth_dict)
def collect(self): # start tracking files and put formatted log lines into queue state = AttrDict(files_tracked=list()) util.start_daemon_thread(self._scan_fpatterns, (state,)) # start extracting formatted logs from queue and send to nsq state = AttrDict(last_push_ts=time.time()) util.start_daemon_thread(self._send_to_nsq, (state,)) # start sending heartbeat to "Hearbeat" topic state = AttrDict(heartbeat_number=0) self.log.info('init_heartbeat') th_heartbeat = util.start_daemon_thread(self._send_heartbeat, (state,))
def get_history(self, before, after=GMAIL_CREATED_TS): """ Get all the msgs from the user's mailbox with in given dates and store in the db Note : Gmail api will consider 'before' : excluded date, 'after' : included date Eg: before : 2017/01/01, after : 2017/01/31 then gmail api gives msgs from 2017/01/02 - 2017/01/31 :ref : https://developers.google.com/gmail/api/guides/filtering :calls : GET https://www.googleapis.com/gmail/v1/users/userId/messages :param before : string :param after : string :rtype : list >>> from mock import Mock >>> obj = GmailHistory() >>> obj.gmail = Mock() >>> api_doc = {'messages':[{'id':'163861dac0f17c61'},{'id':'1632163b6a84ab94'}]} >>> obj.gmail.users().messages().list().execute = Mock(obj.gmail.users().messages().list().execute, return_value=api_doc) >>> obj.store_msgs_in_db = Mock() >>> obj.get_history('2017/05/10') [{'id': '163861dac0f17c61'}, {'id': '1632163b6a84ab94'}] """ self.log.debug("fun get history") query = "{} before:{} after:{}".format(self.query, before, after) response = (self.gmail.users().messages().list( userId="me", maxResults=self.MAX_RESULTS, q=query).execute()) msgs = [] response = AttrDict(response) if "messages" in response: msgs.extend(response.messages) self.store_msgs_in_db(response.messages) while "nextPageToken" in response: page_token = response.nextPageToken response = (self.gmail.users().messages().list( userId="me", maxResults=self.MAX_RESULTS, q=query, pageToken=page_token, ).execute()) response = AttrDict(response) if response.resultSizeEstimate is not 0: msgs.extend(response.messages) self.store_msgs_in_db(response.messages) return msgs
def _parse_master_args(self): if self.args.no_master: return None master = AttrDict() try: m = self.args.master.split(":") for a in m: a = a.split("=") if a[0] == "host": master.host = a[-1] elif a[0] == "port": master.port = a[-1] elif a[0] == "key": master.key = a[-1] elif a[0] == "secret": master.secret = a[-1] else: raise ValueError except ValueError: raise Exception("Invalid Argument", self.args.master) if not master.keys() == ["host", "port", "key", "secret"]: raise Exception("Invalid Argument", self.args.master) return master
def ensure_master(self): ''' Check if Master details are present ''' if not self.state['master']: err_msg = 'No master details stored locally' prRed(err_msg) sys.exit(1) else: return AttrDict(self.state['master'])
def _init_logaggfs_paths(self, logaggfs_dir): """ Logaggfs directories and file initialization >>> test_dir = utils.ensure_dir('/tmp/xyz') >>> trackfile = open(test_dir+'/trackfiles.txt', 'w+'); trackfile.close() >>> lc = collector = LogCollector('localhost', '1088', None, test_dir, test_dir) >>> lc._init_logaggfs_paths(test_dir) AttrDict({'logcache': '/tmp/xyz', 'logs_dir': '/tmp/xyz/logs', 'trackfiles': '/tmp/xyz/trackfiles.txt'}) >>> import shutil; shutil.rmtree(test_dir) """ logaggfs = AttrDict() logaggfs.logcache = logaggfs_dir logaggfs.logs_dir = os.path.abspath( os.path.join(logaggfs.logcache, "logs")) logaggfs.trackfiles = os.path.abspath( os.path.join(logaggfs.logcache, "trackfiles.txt")) return logaggfs
def collect(self): master = AttrDict() try: m = self.args.master.split(':') # So that order of keys is not a factor for a in m: a = a.split('=') if a[0] == 'host': master.host = a[-1] elif a[0] == 'port': master.port = a[-1] elif a[0] == 'key': master.key = a[-1] elif a[0] == 'secret': master.secret = a[-1] else: raise ValueError except ValueError: raise InvalidArgument(self.args.master) # Create collector object collector = LogCollector(self.args.data_dir, self.args.logaggfs_dir, master, self.log) collector_api = CollectorService(collector, self.log) api = API() api.register(collector_api, 'v1') app = tornado.web.Application([ (r'^/logagg/.*', RequestHandler, dict(api=api)), ]) app.listen(self.args.port) tornado.ioloop.IOLoop.current().start()
def get_channelname(self, _id): """ >>> from mock import Mock >>> def side_effect(value, channel='qwer'): ... if value == 'channels.info': ... return {'channel': {'name': 'asdf'}} ... if value == 'groups.info': ... return {"group": {'name': 'abcd'}} ... if value == 'im.list': ... return {'ims': [{'id': 'D1234', 'user': '******'}, {'id': 'D6521', 'user': '******'}]} >>> ob = SlackDump() >>> ob.slack.api_call = Mock(side_effect=side_effect) >>> ob.h.get_username = Mock(ob.h.get_username, return_value='just') >>> ob.get_channelname('G1234') 'abcd' >>> ob.get_channelname('C1234') 'asdf' >>> ob.get_channelname('D1234') 'just' """ name = AttrDict(self.channel_name_cache) if _id in name: return name.id if _id.startswith("C"): info = self.slack.api_call("channels.info", channel=_id) name.id = info["channel"]["name"] elif _id.startswith("G"): info = self.slack.api_call("groups.info", channel=_id) name.id = info["group"]["name"] elif _id.startswith("D"): ch_list = self.slack.api_call("im.list")["ims"] for counter in range(len(ch_list)): if ch_list[counter]["id"] == _id: name.id = self.h.get_username(ch_list[counter]["user"]) return name.id
def parse_master_args(self, master_arg): ''' Parse master arguments ''' # Collector running with or without master if not self.args.no_master: master = AttrDict() try: m = self.args.master.split(':') # So that order of keys is not a factor for a in m: a = a.split('=') if a[0] == 'host': master.host = a[-1] elif a[0] == 'port': master.port = a[-1] elif a[0] == 'cluster_name': master.cluster_name = a[-1] elif a[0] == 'cluster_passwd': master.cluster_passwd = a[-1] else: raise ValueError except ValueError: raise Exception('Invalid argument arg: {}'.format( self.args.master)) return master else: return None
def get_history(self, slack, _id, _name, end_ts, start_ts=0): """ >>> from mock import Mock >>> ob = SlackHistory() >>> ob.slack.api_call = Mock(ob.slack.api_call) >>> ob.slack.api_call.side_effect= [{'messages' :[{'message': 'Dummy <@123>', 'ts': '123.234'}], 'has_more': True}, {'messages' :[{'message': 'Dummy <@123>', 'ts': '122.234'}], 'has_more': False}] >>> ob.parse_dict = Mock(ob.parse_dict, return_value={'message': 'Dummy @asdf', 'ts': '123.234'}) >>> ob._write_messages = Mock() >>> ob.get_history('users.info', '1234', 'general', 12345) 2 """ messages = [] ts = end_ts num = 0 while True: response = self.slack.api_call( slack, channel=_id, latest=ts, oldest=start_ts, count=1000 ) if "messages" not in response: return num messages.extend(response["messages"]) messages = sorted(messages, key=itemgetter("ts")) for message in messages: msg = AttrDict(message) msg.channel = _id msg.channel_name = _name msg = self.parse_dict(msg) self._write_messages(dict(msg)) num += len(messages) if response["has_more"]: ts = messages[-1]["ts"] messages = [] else: return num
def _update_topic_components(self, topic_name): ''' Starts a deamon thread for reading from heartbeat topic and updarting info in database ''' topic_info = self.topic_collection.find_one({'topic_name': topic_name}) topic = topic_info['heartbeat_topic'] nsqd_tcp_address = topic_info['nsqd_tcp_address'] nsqd_http_address = topic_info['nsqd_http_address'] nsq_api_address = topic_info['nsq_api_address'] url = self.NSQ_API_URL.format(nsq_api_address=nsq_api_address, nsqd_tcp_address=nsqd_tcp_address, nsqd_http_address=nsqd_http_address, topic=topic, empty_lines='no') try: self.log.info("updating_components", topic=topic_name) resp = requests.get(url, stream=True) start_read_heartbeat = time.time() for heartbeat in resp.iter_lines(): heartbeat = AttrDict(json.loads(heartbeat.decode('utf-8'))) host = heartbeat.host port = heartbeat.port self.component_collection.update_one( { 'host': host, 'port': port }, {'$set': heartbeat}, upsert=True) except requests.exceptions.ConnectionError: self.log.warn('cannot_request_nsq_api___will_try_again', url=url) except KeyboardInterrupt: if resp: resp.close() sys.exit(0) time.sleep(self.UPDATE_COMPONENTS_INTERVAL)
def real_time_message(self): self.log.info("completed history") while True: msg = self.slack.rtm_read() if not msg: time.sleep(self.EMPTY_READ_SLEEP_INTERVAL) continue if len(msg[0]) == 1: continue msg = AttrDict(msg[0]) self.log.debug("Message recieved", msg=msg) msg = self.parse_message(msg) self.h._write_messages(dict(msg))
def register_component(self, namespace: str, topic_name: str, host: str, port: str) -> dict: ''' Validate auth details and store details of component in database Sample url: 'http://localhost:1088/logagg/v1/register_component?namespace=master&topic_name=logagg&host=78.47.113.210&port=1088' ''' # find for topic or create one topic = self.master.topic_collection.find_one( {'topic_name': topic_name}) if not topic: create_topic_result = self._create_topic(topic_name) if topic or create_topic_result.get('success'): component = { 'namespace': namespace, 'host': host, 'port': str(port), 'topic_name': topic_name } component_info = AttrDict(component) try: topic_name = component_info.topic_name namespace = component_info.namespace host = component_info.host port = component_info.port self.master.component_collection.update_one( { 'topic_name': topic_name, 'namespace': namespace, 'host': host, 'port': port }, {'$set': component}, upsert=True) return {'success': True} except pymongo.errors.DuplicateKeyError as dke: return { 'success': True, 'details': 'Duplicate component details' } else: return create_topic_result
def _resolve_call_info(self, request): r = AttrDict() r.time_deserialize = 0.0 r.namespace = None r.function = None r.method = "GET" urlp = urlparse(request.url) path_parts = urlp.path.lstrip("/").split("/") path_parts = path_parts[1:] # ignore "/api/" part version = path_parts[0] fn_name = path_parts[-1] path_parts = path_parts[:-1] r.function = fn_name if self.api.isversion(version): namespace = "/".join(path_parts[1:]) else: version = self.api.get_default_version() namespace = "/".join(path_parts) namespace = namespace if namespace else None r.namespace = namespace or "" request.log = self.log.bind( __requestid=request.id, namespace=r.namespace, function=fn_name, apiid=self.api._id, ) query_string = urlp.query request.fn_name = fn_name if not self.api.has_api_fn(fn_name, version, namespace): raise UnknownAPIFunction(fn_name) fninfo = self.api.get_api_fn(fn_name, version, namespace) request.fn = fninfo["obj"] info = fninfo["info"] params = info["params"] # parse function arguments from the request param_vals = dict((k, v[0]) for k, v in parse_qs(query_string).items()) for key, val in param_vals.items(): try: if params.get(key, {}).get("type", "") == type(val): continue param_vals[key] = ast.literal_eval(val) except: # FIXME: bald except! param_vals[key] = val if request.method == "POST": r.method = "POST" protocol = self._find_request_protocol(request) for stream_param in params: try: if params[stream_param]["type"] == typing.Generator: stream_param = stream_param break except AttributeError: continue else: stream_param = None # FIXME: request.body: what type is it supposed to be? byte string or file like? if not stream_param: t = time.time() p = protocol.deserialize(request.body) r.deserialize_time = time.time() - t param_vals.update(protocol.deserialize(request.body)) else: param_vals[stream_param] = protocol.deserialize_stream( request.body) if info.get("req", None): param_vals["req"] = request request.fn_params = param_vals return r
def run(self): port = self.args.port host = self.args.host auth = AttrDict() try: m = self.args.auth.split(':') # So that order of keys is not a factor for a in m: a = a.split('=') if a[0] == 'key': auth.key = a[-1] elif a[0] == 'secret': auth.secret = a[-1] else: raise ValueError except ValueError: raise InvalidArgument(self.args.auth) mongodb = AttrDict() try: m = self.args.mongodb.split(':') for a in m: a = a.split('=') if a[0] == 'host': mongodb.host = a[-1] elif a[0] == 'port': mongodb.port = a[-1] elif a[0] == 'user': mongodb.user = a[-1] elif a[0] == 'passwd': mongodb.passwd = a[-1] elif a[0] == 'db': mongodb.name = a[-1] else: raise ValueError except ValueError: raise InvalidArgument(self.args.mongodb) # Create LogaggService object ls = Master(host, port, mongodb, auth, self.log) master_api = MasterService(ls, self.log) api = API() api.register(master_api, 'v1') app = tornado.web.Application([ (r'^/logagg/.*', RequestHandler, dict(api=api)), ]) app.listen(self.args.port) tornado.ioloop.IOLoop.current().start()
def _resolve_call_info(self, request): r = AttrDict() r.time_deserialize = 0.0 r.namespace = None r.function = None r.method = 'GET' urlp = urlparse(request.url) path_parts = urlp.path.lstrip('/').split('/') path_parts = path_parts[1:] # ignore "/api/" part version = path_parts[0] fn_name = path_parts[-1] path_parts = path_parts[:-1] r.function = fn_name if self.api.isversion(version): namespace = '/'.join(path_parts[1:]) else: version = self.api.get_default_version() namespace = '/'.join(path_parts) namespace = namespace if namespace else None r.namespace = namespace or '' request.netpath = '{netpath}=>{id}_{reqid}({namespace}/{function})'.format( netpath=request.headers.get(NETPATH_HEADER, ''), id=self.api.id, reqid=request.id, namespace=r.namespace, function=fn_name, ) request.log = self.log.bind(__netpath=request.netpath) request.apidoc = False if fn_name == 'apidoc': request.apidoc = True return self.api.doc(version, namespace), r query_string = urlp.query request.fn_name = fn_name if not self.api.has_api_fn(fn_name, version, namespace): raise UnknownAPIFunction(fn_name) fninfo = self.api.get_api_fn(fn_name, version, namespace) request.fn = fninfo['obj'] info = fninfo['info'] params = info['params'] # parse function arguments from the request param_vals = dict((k, v[0]) \ for k, v in parse_qs(query_string).items()) for key, val in param_vals.items(): try: param_vals[key] = ast.literal_eval(val) except: # FIXME: bald except! param_vals[key] = val if request.method == 'POST': r.method = 'POST' protocol = self._find_request_protocol(request) for stream_param in params: try: if issubclass(params[stream_param]['type'], typing.Generator): stream_param = stream_param break except AttributeError: continue else: stream_param = None # FIXME: request.body: what type is it supposed to be? byte string or file like? if not stream_param: t = time.time() p = protocol.deserialize(request.body) r.deserialize_time = time.time() - t param_vals.update(protocol.deserialize(request.body)) else: param_vals[stream_param] = protocol.deserialize_stream(request.body) if info.get('req', None): param_vals['req'] = request request.fn_params = param_vals return request, r