def insert_userinfo(): '''Store userinfo into database by reading a userid(s) from a text file or a standard input This command will read a userid(s) and do ldapsearch to find userinfo. And then it will store the userinfo into mysql database. Usage: $ fg-metrics-utility insert_userinfo -i filename [hostname] or $ fg-metrics-utility insert_userinfo userid [hostname] ''' i = FGInstances() filename = "" userid = "" ownerid = "" username = "" project = "" if len(sys.argv) < 3 or sys.argv[1] != "insert_userinfo": print "usage: ./$ fg-metrics-utility insert_userinfo -i filename [hostname] \n\ or \n\ $ fg-metrics-utility insert_userinfo userid [hostname]" return if sys.argv[2] == "-i": filename = sys.argv[3] hostname = sys.argv[4] else: userid = sys.argv[2] hostname = sys.argv[3] if os.path.exists(filename): f = open(filename, "r") while 1: line = f.readline() if not line: break ownerid = line.rstrip() # For comma seperated lines # E.g. 5TQVNLFFHPWOH22QHXERX,hyunjoo,fg45 # Ownerid, username, projectid m = re.search(r'(.*),(.*),(.*)', line.rstrip()) if m: try: userid = m.group(1) username = m.group(2) project = m.group(3) except: m = None pass # In euca3.0+, username is an ownerid of past version of # euca if username: ownerid = username res = self.retrieve_userinfo_ldap(ownerid) if res: if m: # if m exists, res (dict) should be merged with the # comma separated values in order to store the info # into db res["ownerid"] = userid res["username"] = username res["project"] = project if hostname: res["hostname"] = hostname print res i.userinfo_data.append(res) else: res = self.retrieve_userinfo_ldap(userid) if res: i.userinfo_data.append(res) i.write_userinfo_to_db()
class FGLogParser: debug = False progress = True args = None instances = None def __init__(self): self.instances = FGInstances() def convert_data_to_list(self, data, attribute): rest = data[attribute] rest = re.sub(" ", "' , '", rest) rest = "['" + rest[1:-1] + "']" restdata = eval(rest) data[attribute] = restdata def convert_data_to_dict(self, data, attribute): rest = data[attribute] rest = self.convert_str_to_dict_str(rest[1:-1]) restdata = eval(rest) data[attribute] = restdata def convert_str_to_dict_str(self, line): line = re.sub(' +', ' ', line) line = line.strip(" ") line = re.sub(',', '%2C', line) # , value converts '%2C' line = re.sub(' ', ',', line) # more regular dict line = re.sub('=', '\'=\'', line) line = re.sub(',', '\',\'', line) line = re.sub('=', ' : ', line) line = re.sub('%2C', ',', line) # Back to , value return '{\'' + line + '\'}' def parse_type_and_date(self, line, data): # split line after the third ] to (find date, id, msgtype) # put the rest in the string "rest" try: m = re.search( r'\[(.*)\]\[(.*)\]\[(.*)\](.*)', line, re.M | re.I) data['date'] = datetime.strptime( m.group(1), '%a %b %d %H:%M:%S %Y') data['date'] = fgmetric.util.FGTimeZone.convert_timezone( data['date'], self.args.timezone, "EST") data['id'] = m.group(2) data['msgtype'] = m.group(3) rest = m.group(4) rest = re.sub(' +}', '}', rest).strip() if rest.startswith("running"): data['linetype'] = "running" return rest elif rest.startswith("calling"): data['linetype'] = "calling" return rest else: location = rest.index(":") linetype = rest[0:location] data['linetype'] = re.sub('\(\)', '', linetype).strip() rest = rest[location+1:].strip() return rest except (ValueError, AttributeError): data['linetype'] = "IGNORE" return except: data['linetype'] = "IGNORE" # print sys.exc_info() return def ccInstance_parser(self, rest, data): """parses the line and returns a dict""" # replace print_ccInstance(): with linetype=print_ccInstance # rest = rest.replace("print_ccInstance():","linetype=print_ccInstance") # replace refreshinstances(): with calltype=refresh_instances # RunInstances(): rest = rest.replace( "RunInstances():", "calltype=run_instances") # removing multiple spaces rest = rest.replace( "refresh_instances():", "calltype=refresh_instances") # removing multiple spaces # separate easy assignments from those that would contain groups, for now simply put groups as a string # all others are merged into a string with *=* into rest m = re.search( r'(.*)keyName=(.*)ccnet=(.*)ccvm=(.*)ncHostIdx=(.*)volumes=(.*)groupNames=(.*)', rest, re.M | re.I) # Version 3.0.2 # Deleted: emiId, kernelId, ramdiskId, emiURL, kernelURL and ramdiskURL # Added: accountId, platform, and bundleTaskStateName # Changed: value of ownerId is changed try: data['keyName'] = m.group(2).strip() data["ccnet"] = m.group(3).strip() data["ccvm"] = m.group(4).strip() data["volumes"] = m.group(6).strip() data["groupNames"] = m.group(7).strip() # assemble the rest string rest = m.group(1) + "ncHostIdx=" + m.group(5) except: return # GATHER ALL SIMPLE *=* assignments into a single rest line and add # each entry to dict via eval rest = self.convert_str_to_dict_str(rest) try: restdata = eval(rest) except: print "eval failed:(" + str(sys.exc_info()[0]) + "), (" + str(rest) + ")" return data.update(restdata) # convert ccvm and ccnet to dict self.convert_data_to_dict(data, "ccvm") self.convert_data_to_dict(data, "ccnet") # converts volumes and groupNAmes to list self.convert_data_to_list(data, "groupNames") self.convert_data_to_list(data, "volumes") # convert the timestamp data["ts"] = datetime.fromtimestamp(int(data["ts"])) return data def refresh_resource_parser(self, rest, data): #[Wed Nov 9 19:50:08 2011][008128][EUCADEBUG ] refresh_resources(): received data from node=i2 mem=24276/22740 disk=306400/305364 cores=8/6 if (rest.find("received") > -1): rest = re.sub("received data from", "", rest).strip() # node=i2 mem=24276/22740 disk=306400/305364 cores=8/6 m = re.search( r'node=(.*) mem=(.*)[/](.*) disk=(.*)/(.*) cores=(.*)/(.*)', rest, re.M | re.I) data["node"] = m.group(1) data["mem"] = m.group(2) data["mem_max"] = m.group(3) data["disk"] = m.group(4) data["disk_max"] = m.group(5) data["cores"] = m.group(6) data["cores_max"] = m.group(7) else: data["calltype"] = "ignore" return data def terminate_instances_param_parser(self, rest, data): rest = rest.strip() if rest.startswith("params"): # params: userId=(null), instIdsLen=1, firstInstId=i-417B07B2 rest = re.sub("params:", "", rest).strip() # node=i2 mem=24276/22740 disk=306400/305364 cores=8/6 m = re.search( r'userId=(.*) instIdsLen=(.*) firstInstId=(.*)', rest, re.M | re.I) userid = m.group(1) if userid == "(null),": data["userId"] = "null" else: data["userId"] = m.group(1) data["instIdsLen"] = m.group(2) data["firstInstId"] = m.group(3) else: data["calltype"] = "ignore" return data def print_counter(self, label, counter): print label + " = " + str(counter) def set_argparser(self): def_s_date = "19700101" def_e_date = "29991231" def_conf = "futuregrid.cfg" def_linetypes = [ "TerminateInstances", "refresh_resources", "print_ccInstance"] def_platform = "eucalyptus" def_platform_version = "3.0.2" parser = argparse.ArgumentParser() parser.add_argument( "-s", "--from", dest="from_date", default=def_s_date, help="start date to begin parsing (type: YYYYMMDD)") parser.add_argument("-e", "--to", dest="to_date", default=def_e_date, help="end date to finish parsing (type: YYYYMMDD)") parser.add_argument("-i", "--input_dir", dest="dirname", required=True, help="Absolute path where the files (e.g. 2012-02-16-00-21-17-cc.log generated by fg-unix) exist") parser.add_argument("--conf", dest="conf", help="configuraton file of the database to be used") parser.add_argument( "-l", "--parse", nargs="+", dest="linetypes", default=def_linetypes, help="specify function names which you want to parse (types: print_ccInstance, refresh_resources)") parser.add_argument("-z", "--gzip", action="store_true", default=False, help="gzip compressed files will be loaded") parser.add_argument( "-d", "--debug", action="store_true", default=False, help="debug on|off") parser.add_argument("-p", "--platform", default=def_platform, help="Cloud platform name, required. (e.g. nimbus, openstack, eucalyptus, etc)") parser.add_argument( "-pv", "--platform_version", default=def_platform_version, help="Cloud platform version. (e.g. 2.9 for nimbus, essex for openstack, and 2.0 or 3.1 for eucalyptus)") parser.add_argument("-n", "--nodename", required=True, help="Hostname of the cloud platform, required. (e.g., hotel, sierra, india, alamo, foxtrot)") parser.add_argument( "-tz", "--timezone", dest="timezone", default="local()", help="gzip compressed files will be loaded") args = parser.parse_args() print args ''' How we can use argparse in this file? ------------------------------------- 1) fg-parser.py -s start date -e end date; will parse logs between the period that specified by -s and -e options ex) fg-parser.py -s 20120216 -e 20120216 => 2012-02-16-00-21-17-cc.log ~ 2012-02-16-23-47-16-cc.log will be parsed 2) fg-parser.py -f filename; Only parse the file that specified by -f option ex) fg-parser.py -f 2012-02-16-00-21-17-cc.log => Only that file will be parsed ''' self.args = args def check_argparser(self): if self.args.conf: self.instances.db.set_conf(self.args.conf) self.instances.db.update_conf() if self.args.gzip: import zlib CHUNKSIZE = 1024 self.gz = zlib.decompressobj(16+zlib.MAX_WBITS) if self.args.debug: self.debug = True def read_compressed_line(self, line): if self.args.gzip: return self.gz.decompress(line) else: return line def read_logs(self): if self.args.dirname == "-": self.read_stdin() else: self.read_files() def read_files(self): from_date = datetime.strptime( self.args.from_date + " 00:00:00", '%Y%m%d %H:%M:%S') to_date = datetime.strptime( self.args.to_date + " 23:59:59", '%Y%m%d %H:%M:%S') dirname = self.args.dirname try: listdir = os.listdir(dirname) except: listdir = "" for filename in listdir: try: single_date = datetime.strptime(str( filename).split(".")[0], '%Y-%m-%d-%H-%M-%S-cc') if from_date <= single_date <= to_date: print "Processing file is: " + filename self.parse_log( dirname + "/" + filename, self.instances.update_traceinfo) except (ValueError): print "error occured parsing for: " + filename self.debug_output(sys.exc_info()) continue except: print "error occured parsing for: " + filename print sys.exc_info() self.debug_output(sys.exc_info()) continue def read_stdin(self): try: print "Processing stdin... " self.parse_log(None, self.instances.update_traceinfo) except: print sys.exc_info() pass def parse_log(self, filename, analyze): lines_total = lines_ignored = count_terminate_instances = count_refresh_resource = count_ccInstance_parser = read_bytes = 0 parse_types = self.args.linetypes print filename if filename: file_size = os.path.getsize(filename) self.debug_output("SIZE>:" + str(file_size)) for line in fileinput.input(filename, openhook=fileinput.hook_compressed): # line = self.read_compressed_line(line) line = line.rstrip() ignore = False lines_total += 1 read_bytes += len(line) data = {} if (self.debug or self.progress) and filename and ((lines_total % 1000) == 0): percent = int(100 * read_bytes / file_size) sys.stdout.write("\r%2d%%" % percent) sys.stdout.flush() # self.debug_output("DEBUG " + str(lines_total) +"> " + line) rest = self.parse_type_and_date(line, data) ''' Temporarily prince_ccInstance is only available to parse if data["linetype"] == "TerminateInstances" and "TerminateInstances" in parse_types: count_terminate_instances += 1 terminate_instances_param_parser(rest, data) elif data["linetype"] == "refresh_resources" and "refresh_resources" in parse_types: count_refresh_resource += 1 refresh_resource_parser(rest, data) el''' if data["linetype"] == "print_ccInstance" and "print_ccInstance" in parse_types: count_ccInstance_parser += 1 if not self.ccInstance_parser(rest, data): ignore = True else: # cloudplatformid data["cloudPlatformIdRef"] = self.cloudplatform_id analyze(data) else: ignore = True if ignore: lines_ignored += 1 # self.debug_output("IGNORED LAST LINE> ") # For Debugging to make it faster terminate at 5 # if self.debug and (len(self.instances.data) > 5): # break fileinput.close() self.print_counter("lines total", lines_total) self.print_counter("lines ignored = ", lines_ignored) self.print_counter( "count_terminate_instances", count_terminate_instances) self.print_counter("count_refresh_resource", count_refresh_resource) self.print_counter("count_ccInstance_parser ", count_ccInstance_parser) def store_parsed(self): self.instances.db.connect() self.instances.write_to_db() self.instances.set_userinfo() self.instances.write_userinfo_to_db() self.print_counter("======================", "") self.print_counter("instance stored total", len( self.instances.instance)) self.print_counter("userinfo stored total", len( self.instances.userinfo)) def get_cloudplatform_info(self): self.instances.db.conf() self.instances.db.connect() whereclause = {"platform": self.args.platform, "hostname": self.args.nodename, "version": self.args.platform_version} self.cloudplatform_id = self.instances.get_cloudplatform_id( whereclause) def debug_output(self, msg): if not self.debug: return print msg def test_file_read(self, filename): parse_log(filename, self.instances.update_traceinfo) self.instances.dump() def test_sql_read(self): self.instances.read_from_db() self.instances.dump() def test_sql_write(self, filename): parse_log(filename, self.instances.update_traceinfo) instances.write_to_db()