if payload["verb"] != "GET": return True items = payload["request"].replace("/cms/cpt/Software/download/","/cmssw/",1).split("/") if len(items)<6: return True if items[3] == "apt": items[3]="PRMS" if items[3] != "RPMS": return True pkg, cmspkg, arch, repo, dev = items[-1], "apt", "" , "", 0 if "?" in pkg: pkg, pkgopts = pkg.split("?",1) if "version=" in pkgopts: cmspkg = pkgopts.split("version=",1)[1].split("&",1)[0] if not pkg.endswith(".rpm"): return True if (items[1] == "cgi-bin") and items[2].startswith("cmspkg"): if len(items)<8: return True if items[2].endswith('-dev'): dev=1 repo, arch = items[4], items[5] elif items[1] == "cmssw": repo, arch = items[2], items[4] else: return True from urllib import unquote xpayload = {'dev' : dev, 'repository' : unquote(repo), 'architecture' : unquote(arch), 'package' : unquote(pkg).split("-1-",1)[0], 'cmspkg' : unquote(cmspkg)} for x in ["@timestamp","ip"]: xpayload[x] = payload[x] return send_payload("cmspkg-access-"+week,"rpm-packages", id, dumps(xpayload), passwd_file="/data/es/es_secret") count=run_cmd("pgrep -l -x -f '^python .*/es_cmsrep_apache.py$' | wc -l",False) if int(count)>1: exit(0) logs = run_cmd("ls -rt /var/log/httpd/access_log* | grep -v '[.]gz$'").split("\n") log = logwatch("httpd",log_dir="/data/es") s,c=log.process(logs, process) print "Total entries processed",c
tsec = mktime( datetime.strptime(items[3][1:], '%d/%b/%Y:%H:%M:%S').timetuple()) week = str(int(tsec / (86400 * 7))) payload["@timestamp"] = int(tsec * 1000) if len(items) > 10: payload["referrer"] = items[10][1:-1] if len(items) > 11 and re.match('^"[0-9]+(\.[0-9]+)+"$', items[11]): payload["ip"] = items[11][1:-1] if len(items) > 12: agent = " ".join(items[12:]).replace('"', '') payload["agent"] = agent payload["agent_type"] = agent.replace(" ", "-").split("/", 1)[0].upper() id = sha1(line).hexdigest() if (count % 1000) == 0: print "Processed entries", count return send_payload("apache-cmsdoxygen-" + week, "access_log", id, dumps(payload), passwd_file="/data/es/es_secret") count = run_cmd("pgrep -l -x -f '^python .*/es_cmsdoxygen_apache.py$' | wc -l", False) if int(count) > 1: exit(0) logs = run_cmd( "ls -rt /var/log/httpd/sdt-access_log* | grep -v '[.]gz$'").split("\n") log = logwatch("httpd", log_dir="/data/es") s, c = log.process(logs, process) print "Total entries processed", c
if len(items) < 8: return True if items[2].endswith('-dev'): dev = 1 repo, arch = items[4], items[5] elif items[1] == "cmssw": repo, arch = items[2], items[4] else: return True from _py2with3compatibility import unquote xpayload = { 'dev': dev, 'repository': unquote(repo), 'architecture': unquote(arch), 'package': unquote(pkg).split("-1-", 1)[0], 'cmspkg': unquote(cmspkg) } for x in ["@timestamp", "ip"]: xpayload[x] = payload[x] return send_payload("cmspkg-access-" + week, "rpm-packages", id, dumps(xpayload)) count = run_cmd("pgrep -l -x -f '^python .*/es_cmsrep_apache.py$' | wc -l", False) if int(count) > 1: exit(0) logs = run_cmd( "ls -rt /var/log/httpd/cmsrep-non-ssl_access.log* | grep -v '[.]gz$'" ).split("\n") log = logwatch("httpd", log_dir="/data/es") s, c = log.process(logs, process) print("Total entries processed", c)