def impact_analysis(ip,date): app_path = Configuration.spot() file_name = "stats-{0}.json".format(ip.replace(".","_")) hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \ .format(app_path,date.year,date.month,date.day,ip.replace(".","_")) if HDFSClient.file_exists(hdfs_path,file_name): return json.loads(HDFSClient.get_file("{0}/{1}" \ .format(hdfs_path,file_name))) else: return {}
def impact_analysis(ip,date): app_path = Configuration.spot() file_name = "stats-{0}.json".format(ip.replace(".","_")) hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \ .format(app_path,date.year,date.month,date.day,ip.replace(".","_")) if HDFSClient.file_exists(hdfs_path,file_name): return json.loads(HDFSClient.get_file("{0}/{1}" \ .format(hdfs_path,file_name))) else: return {}
def incident_progression(ip,date): app_path = Configuration.spot() file_name = "threat-dendro-{0}.json".format(ip.replace(".","_")) hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \ .format(app_path,date.year,date.month,date.day,ip.replace(".","_")) if HDFSClient.file_exists(hdfs_path,file_name): return json.loads(HDFSClient.get_file("{0}/{1}" \ .format(hdfs_path,file_name))) else: return {}
def create_incident_progression(anchor,requests,referers,date): hash_name = md5.new(str(anchor)).hexdigest() file_name = "incident-progression-{0}.json".format(hash_name) app_path = Configuration.spot() hdfs_path = "{0}/proxy/oa/storyboard/{1}/{2}/{3}"\ .format(app_path,date.year,date.month,date.day) data = {'fulluri':anchor, 'requests':requests,'referer_for':referers.keys()} if HDFSClient.put_file_json(data,hdfs_path,file_name,overwrite_file=True) : response = "Incident progression successfuly created" else: return False
def incident_progression(ip,date): app_path = Configuration.spot() file_name = "threat-dendro-{0}.json".format(ip.replace(".","_")) hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \ .format(app_path,date.year,date.month,date.day,ip.replace(".","_")) if HDFSClient.file_exists(hdfs_path,file_name): return json.loads(HDFSClient.get_file("{0}/{1}" \ .format(hdfs_path,file_name))) else: return {}
def incident_progression(date,uri): app_path = Configuration.spot() hdfs_path = "{0}/proxy/oa/storyboard/{1}/{2}/{3}".format(app_path,\ date.year,date.month,date.day) hash_name = md5.new(str(uri)).hexdigest() file_name = "incident-progression-{0}.json".format(hash_name) if HDFSClient.file_exists(hdfs_path,file_name): return json.loads(HDFSClient.get_file("{0}/{1}"\ .format(hdfs_path,file_name))) else: return {}
def incident_progression(date, uri): app_path = Configuration.spot() hdfs_path = "{0}/proxy/oa/storyboard/{1}/{2}/{3}".format(app_path,\ date.year,date.month,date.day) hash_name = md5.new(str(uri)).hexdigest() file_name = "incident-progression-{0}.json".format(hash_name) if HDFSClient.file_exists(hdfs_path, file_name): return json.loads(HDFSClient.get_file("{0}/{1}"\ .format(hdfs_path,file_name))) else: return {}
def save_comments(anchor, ip, query, title, text, date): db = Configuration.db() sb_query = (""" SELECT ip_threat,dns_threat,title,text FROM {0}.dns_storyboard WHERE y = {1} AND m= {2} AND d={3} """).format(db, date.year, date.month, date.day) sb_data = ImpalaEngine.execute_query_as_list(sb_query) # find value if already exists. saved = False for item in sb_data: if item["ip_threat"] == anchor or item["dns_threat"] == anchor: item["title"] = title item["text"] = text saved = True if not saved: sb_data.append({ 'text': text, 'ip_threat': str(ip), 'title': title, 'dns_threat': query }) #remove old file. app_path = Configuration.spot() old_file = "{0}/dns/hive/oa/storyboard/y={1}/m={2}/d={3}/"\ .format(app_path,date.year,date.month,date.day) HDFSClient.delete_folder(old_file, "impala") ImpalaEngine.execute_query("invalidate metadata") for item in sb_data: insert_query = (""" INSERT INTO {0}.dns_storyboard PARTITION(y={1} , m={2} ,d={3}) VALUES ( '{4}', '{5}', '{6}','{7}') """)\ .format(db,date.year,date.month,date.day,\ item["ip_threat"],item["dns_threat"],item["title"],item["text"]) ImpalaEngine.execute_query(insert_query) return True
def save_comment(ip,title,text,date): #Get current table info. db = Configuration.db() sb_query = (""" SELECT ip_threat,title,text FROM {0}.flow_storyboard WHERE y = {1} AND m= {2} AND d={3} """).format(db,date.year,date.month,date.day) sb_data = ImpalaEngine.execute_query_as_list(sb_query) # find value if already exists. saved = False for item in sb_data: if item["ip_threat"] == ip: item["title"] = title item["text"] = text saved = True if not saved: sb_data.append({'text': text, 'ip_threat': str(ip), 'title': title}) #remove old file. app_path = Configuration.spot() old_file = "{0}/flow/hive/oa/storyboard/y={1}/m={2}/d={3}/" \ .format(app_path,date.year,date.month,date.day) # remove file manually to allow the comments update. HDFSClient.delete_folder(old_file,"impala") ImpalaEngine.execute_query("invalidate metadata") for item in sb_data: insert_query = (""" INSERT INTO {0}.flow_storyboard PARTITION(y={1} , m={2} ,d={3}) VALUES ( '{4}', '{5}','{6}') """) \ .format(db,date.year,date.month,date.day, \ item["ip_threat"],item["title"],item["text"]) ImpalaEngine.execute_query(insert_query) return True
def reset_scored_connections(date): flow_storyboard = "flow/hive/oa/storyboard" flow_threat_investigation = "flow/hive/oa/threat_investigation" flow_timeline = "flow/hive/oa/timeline" app_path = Configuration.spot() try: # remove parquet files manually to allow the comments update. HDFSClient.delete_folder("{0}/{1}/y={2}/m={3}/d={4}/".format( \ app_path,flow_storyboard,date.year,date.month,date.day) , "impala") HDFSClient.delete_folder("{0}/{1}/y={2}/m={3}/d={4}/".format( \ app_path,flow_threat_investigation,date.year,date.month,date.day), "impala") HDFSClient.delete_folder("{0}/{1}/y={2}/m={3}/d={4}/".format( \ app_path,flow_timeline,date.year,date.month,date.day), "impala") ImpalaEngine.execute_query("invalidate metadata") return True except HdfsError: return False
def reset_scored_connections(date): proxy_storyboard = "proxy/hive/oa/storyboard" proxy_threat_investigation = "dns_threat_dendro/hive/oa/timeline" proxy_timeline = "proxy/hive/oa/threat_investigation" app_path = Configuration.spot() try: # remove parquet files manually to allow the comments update. HDFSClient.delete_folder("{0}/{1}/y={2}/m={3}/d={4}/".format( \ app_path,proxy_storyboard,date.year,date.month,date.day) , "impala") HDFSClient.delete_folder("{0}/{1}/y={2}/m={3}/d={4}/".format( \ app_path,proxy_threat_investigation,date.year,date.month,date.day), "impala") HDFSClient.delete_folder("{0}/{1}/y={2}/m={3}/d={4}/".format( \ app_path,proxy_timeline,date.year,date.month,date.day), "impala") ImpalaEngine.execute_query("invalidate metadata") return True except HdfsError: return False
def create_incident_progression(anchor, requests, referers, date): hash_name = md5.new(str(anchor)).hexdigest() file_name = "incident-progression-{0}.json".format(hash_name) app_path = Configuration.spot() hdfs_path = "{0}/proxy/oa/storyboard/{1}/{2}/{3}"\ .format(app_path,date.year,date.month,date.day) data = { 'fulluri': anchor, 'requests': requests, 'referer_for': referers.keys() } if HDFSClient.put_file_json(data, hdfs_path, file_name, overwrite_file=True): response = "Incident progression successfuly created" else: return False
def create_map_view(ip, inbound, outbound, twoway,date,iploc): iplist = '' globe_fpath = 'globe-' + ip.replace('.','_') + ".json" if os.path.isfile(iploc): iplist = np.loadtxt(iploc,dtype=np.uint32,delimiter=',',usecols={0},\ converters={0: lambda s: np.uint32(s.replace('"',''))}) else: print "No iploc.csv file was found, Map View map won't be created" response = "" if iplist != '': globe_json = {} globe_json['type'] = "FeatureCollection" globe_json['sourceips'] = [] globe_json['destips'] = [] for srcip in twoway: try: row = twoway[srcip]['geo'] globe_json['destips'].append({ 'type': 'Feature', 'properties': { 'location':row[8], 'ip':srcip, 'type':1 }, 'geometry': { 'type': 'Point', 'coordinates': [float(row[7]), float(row[6])] } }) except ValueError: pass for dstip in outbound: try: row = outbound[dstip]['geo'] dst_geo = outbound[dstip]['geo_dst'] globe_json['sourceips'].append({ 'type': 'Feature', 'properties': { 'location':row[8], 'ip':dstip, 'type':3 }, 'geometry': { 'type': 'Point', 'coordinates': [float(row[7]), float(row[6])] } }) globe_json['destips'].append({ 'type': 'Feature', 'properties': { 'location':row[8], 'ip':outbound[dstip]['dst_ip'], 'type':3 }, 'geometry': { 'type': 'Point', 'coordinates': [float(dst_geo[7]), float(dst_geo[6])] } }) except ValueError: pass for dstip in inbound: try: row = inbound[dstip]['geo'] dst_geo = inbound[dstip]['geo_src'] globe_json['sourceips'].append({ 'type': 'Feature', 'properties': { 'location':row[8], 'ip':dstip, 'type':2 }, 'geometry': { 'type': 'Point', 'coordinates': [float(row[7]), float(row[6])] } }) globe_json['destips'].append({ 'type': 'Feature', 'properties': { 'location':row[8], 'ip':inbound[dstip]['src_ip'], 'type':2 }, 'geometry': { 'type': 'Point', 'coordinates': [float(dst_geo[7]), float(dst_geo[6])] } }) except ValueError: pass json_str = json.dumps(globe_json) app_path = Configuration.spot() hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \ .format(app_path,date.year,date.month,date.day,ip.replace(".","_")) if HDFSClient.put_file_json(globe_json,hdfs_path,globe_fpath,overwrite_file=True) : response = "Geolocation map successfully created \n" else: response = "The map can't be created without an iploc file \n" return response
def create_incident_progression(anchor, inbound, outbound, twoway, date): dendro_fpath = 'threat-dendro-' + anchor.replace('.','_') + ".json" obj = { 'name':anchor, 'children': [], 'time': "" } #----- Add Inbound Connections-------# obj["children"].append({'name': 'Inbound Only', 'children': [], 'impact': 0}) if len(inbound) > 0: in_ctxs = {} for ip in inbound: if 'nwloc' in inbound[ip] and len(inbound[ip]['nwloc']) > 0: ctx = inbound[ip]['nwloc'][2] if ctx not in in_ctxs: in_ctxs[ctx] = 1 else: in_ctxs[ctx] += 1 for ctx in in_ctxs: obj["children"][0]['children'].append({ 'name': ctx, 'impact': in_ctxs[ctx] }) #------ Add Outbound ----------------# obj["children"].append({'name':'Outbound Only','children':[],'impact':0}) if len(outbound) > 0: out_ctxs = {} for ip in outbound: if 'nwloc' in outbound[ip] and len(outbound[ip]['nwloc']) > 0: ctx = outbound[ip]['nwloc'][2] if ctx not in out_ctxs: out_ctxs[ctx] = 1 else: out_ctxs[ctx] += 1 for ctx in out_ctxs: obj["children"][1]['children'].append({ 'name': ctx, 'impact': out_ctxs[ctx] }) #------ Add TwoWay ----------------# obj["children"].append({'name':'two way','children': [], 'impact': 0}) if len(twoway) > 0: tw_ctxs = {} for ip in twoway: if 'nwloc' in twoway[ip] and len(twoway[ip]['nwloc']) > 0: ctx = twoway[ip]['nwloc'][2] if ctx not in tw_ctxs: tw_ctxs[ctx] = 1 else: tw_ctxs[ctx] += 1 for ctx in tw_ctxs: obj["children"][2]['children'].append({ 'name': ctx, 'impact': tw_ctxs[ctx] }) app_path = Configuration.spot() hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \ .format(app_path,date.year,date.month,date.day,anchor.replace(".","_")) if HDFSClient.put_file_json(obj,hdfs_path,dendro_fpath,overwrite_file=True): return "Incident progression successfully created \n" else: return "Incident progression couldn't be created \n"
def score_connection(score,date,src_ip=None,dst_ip=None,src_port=None,dst_port=None): if not src_ip and not dst_ip and not src_port and not dst_port: return False db = Configuration.db() # get connections to score connections_query = (""" SELECT tstart,srcip,dstip,sport,dport, ibyt,ipkt FROM {0}.flow_scores WHERE y = {1} AND m={2} AND d={3} """).format(db,date.year,date.month,date.day) connections_filter = "" connections_filter += " AND srcip = '{0}'".format(src_ip) if src_ip else "" connections_filter += " AND dstip = '{0}'".format(dst_ip) if dst_ip else "" connections_filter += " AND sport = {0}" \ .format(str(src_port)) if src_port else "" connections_filter += " AND dport = {0}" \ .format(str(dst_port)) if dst_port else "" connections = ImpalaEngine.execute_query(connections_query + connections_filter) # add score to connections insert_command = (""" INSERT INTO {0}.flow_threat_investigation PARTITION (y={1},m={2},d={3}) VALUES (""") \ .format(db,date.year,date.month,date.day) fb_data = [] first = True num_rows = 0 for row in connections: # insert into flow_threat_investigation. threat_data = (row[0],row[1],row[2],row[3],row[4],score) fb_data.append([score,row[0],row[1],row[2],row[3],row[4],row[5],row[6]]) insert_command += "{0}{1}".format("," if not first else "", threat_data) first = False num_rows += 1 insert_command += ")" if num_rows > 0: ImpalaEngine.execute_query(insert_command) # create feedback file. app_path = Configuration.spot() feedback_path = "{0}/flow/scored_results/{1}{2}{3}/feedback" \ .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2)) append_file = True if len(HDFSClient.list_dir(feedback_path)) == 0: fb_data.insert(0,["sev","tstart","sip","dip","sport","dport","ipkt","ibyt"]) append_file = False HDFSClient.put_file_csv(fb_data,feedback_path,"ml_feedback.csv",\ append_file=append_file) return True
def score_request(date,score,uri): if not score and not uri: return None db = Configuration.db() p_query = (""" SELECT tdate,time,clientip,host,reqmethod,useragent,resconttype ,duration,username,webcat,referer,respcode,uriport ,uripath,uriquery,serverip,scbytes,csbytes,fulluri ,word,ml_score,uri_rep,respcode_name,network_context FROM {0}.proxy_scores WHERE y={1} and m={2} and d={3} AND fulluri = '{4}' """).format(db,date.year,date.month,date.day,uri) connections = ImpalaEngine.execute_query(p_query) # add score to connections insert_command = (""" INSERT INTO {0}.proxy_threat_investigation PARTITION (y={1},m={2},d={3}) VALUES (""") \ .format(db,date.year,date.month,date.day) fb_data = [] first = True num_rows = 0 for row in connections: cip_index = row[2] uri_index = row[18] tme_index = row[2] hash_field = [str( md5.new(str(cip_index) + str(uri_index)).hexdigest() \ + str((tme_index.split(":"))[0]) )] threat_data = (row[0],row[18],score) fb_data.append([row[0],row[1],row[2],row[3],row[4],row[5],row[6],row[7] \ ,row[8],row[9],row[10],row[11],row[12],row[13],row[14],row[15] \ ,row[16],row[17],row[18],row[19],score,row[20],row[21],row[22], \ row[23],hash_field]) insert_command += "{0}{1}".format("," if not first else "", threat_data) first = False num_rows += 1 insert_command += ")" if num_rows > 0: ImpalaEngine.execute_query(insert_command) # create feedback file. app_path = Configuration.spot() feedback_path = "{0}/proxy/scored_results/{1}{2}{3}/feedback"\ .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2)) ap_file = True if len(HDFSClient.list_dir(feedback_path)) == 0: fb_data.insert(0,["p_date","p_time","clientip","host","reqmethod",\ "useragent","resconttype","duration","username","webcat","referer",\ "respcode","uriport","uripath","uriquery","serverip","scbytes","csbytes",\ "fulluri","word","score","uri_rep","uri_sev","respcode_name",\ "network_context","hash"]) ap_file = False HDFSClient.put_file_csv(fb_data,feedback_path,"ml_feedback.csv",append_file=ap_file) return True
def score_connection(date, ip="", dns="", ip_sev=0, dns_sev=0): if (not ip and not ip_sev) and (not dns and not dns_sev): return False db = Configuration.db() sq_query = (""" SELECT frame_time,unix_tstamp,frame_len,ip_dst,dns_qry_name,dns_qry_class, dns_qry_type,dns_qry_rcode,ml_score,tld,query_rep, hh,dns_qry_class_name,dns_qry_type_name,dns_qry_rcode_name, network_context FROM {0}.dns_scores WHERE y={1} and m={2} and d={3} AND ( """).format(db, date.year, date.month, date.day) connections_filter = "" connections_filter += "ip_dst = '{0}' ".format(ip) if ip else "" connections_filter += " OR " if ip and dns else "" connections_filter += "dns_qry_name = '{0}' ".format(dns) if dns else "" connections_filter += ")" connections = ImpalaEngine.execute_query(sq_query + connections_filter) # add score to connections insert_command = ("""INSERT INTO {0}.dns_threat_investigation PARTITION (y={1},m={2},d={3}) VALUES (""") \ .format(db,date.year,date.month,date.day) fb_data = [] first = True num_rows = 0 for row in connections: # insert into dns_threat_investigation. threat_data = (row[1],row[3],row[4],ip_sev if ip == row[3] else 0,\ dns_sev if dns == row[4] else 0) fb_data.append([row[0],row[2],row[3],row[4],row[5],row[6],row[7],\ row[8],row[9],row[10],row[11],ip_sev,dns_sev,row[12],row[13],row[14],\ row[15],row[1]]) insert_command += "{0}{1}".format("," if not first else "", threat_data) first = False num_rows += 1 insert_command += ")" if num_rows > 0: ImpalaEngine.execute_query(insert_command) # create feedback file. app_path = Configuration.spot() feedback_path = "{0}/dns/scored_results/{1}{2}{3}/feedback"\ .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2)) ap_file = True if len(HDFSClient.list_dir(feedback_path)) == 0: fb_data.insert(0,["frame_time","frame_len","ip_dst","dns_qry_name",\ "dns_qry_class","dns_qry_type","dns_qry_rcode","score","tld","query_rep",\ "hh","ip_sev","dns_sev","dns_qry_class_name","dns_qry_type_name",\ "dns_qry_rcode_name","network_context","unix_tstamp"]) ap_file = False HDFSClient.put_file_csv(fb_data, feedback_path, "ml_feedback.csv", append_file=ap_file) return True
def create_timeline(anchor,clientips,date,top_results): response = "" susp_ips = [] if clientips: srtlist = sorted(list(clientips.items()), key=lambda x: x[1], reverse=True) for val in srtlist[:top_results]: susp_ips.append(val[0]) if anchor != "": db = Configuration.db() time_line_query = (""" SELECT p_threat,tstart,tend,duration,clientip,respcode,respcodename FROM {0}.proxy_timeline WHERE y={1} AND m={2} AND d={3} AND p_threat != '{4}' """).format(db,date.year,date.month,date.day,anchor.replace("'","//'")) tmp_timeline_data = ImpalaEngine.execute_query_as_list(time_line_query) imp_query = (""" INSERT INTO TABLE {0}.proxy_timeline PARTITION (y={2}, m={3},d={4}) SELECT '{7}' as p_threat, concat(cast(p_date as string), ' ', cast(MIN(p_time) as string)) AS tstart, concat(cast(p_date as string), ' ', cast(MAX(p_time) as string)) AS tend, SUM(duration) AS duration, clientip, respcode,"respCodeName" as respCodeName FROM {0}.proxy WHERE fulluri='{1}' AND clientip IN ({5}) AND y='{2}' AND m='{3}' AND d='{4}' GROUP BY clientip, p_time, respcode, p_date LIMIT {6} """)\ .format(db,anchor,date.year,str(date.month).zfill(2),\ str(date.day).zfill(2),("'" + "','".join(susp_ips) + "'")\ ,top_results,anchor) app_path = Configuration.spot() old_file = "{0}/proxy/hive/oa/timeline/y={1}/m={2}/d={3}"\ .format(app_path,date.year,date.month,date.day) HDFSClient.delete_folder(old_file,"impala") ImpalaEngine.execute_query("invalidate metadata") #Insert temporary values for item in tmp_timeline_data: insert_query = (""" INSERT INTO {0}.proxy_timeline PARTITION(y={1} , m={2} ,d={3}) VALUES ('{4}', '{5}', '{6}',{7},'{8}','{9}','{10}') """)\ .format(db,date.year,date.month,date.day,\ item["p_threat"],item["tstart"],item["tend"],item["duration"],item["clientip"],item["respcode"],item["respcodename"]) ImpalaEngine.execute_query(insert_query) ImpalaEngine.execute_query(imp_query) response = "Timeline successfully saved" else: response = "Timeline couldn't be created"
def create_impact_analysis(anchor, inbound, outbound, twoway, threat_name,date): stats_fpath = 'stats-' + anchor.replace('.','_') + ".json" obj = { 'name':threat_name, 'children': [], 'size': len(inbound) + len(outbound) + len(twoway) } #----- Add Inbound Connections-------# obj["children"].append({'name': 'Inbound Only', 'children': [], 'size': len(inbound)}) in_ctxs = {} for ip in inbound: full_ctx = '' if 'nwloc' in inbound[ip] and len(inbound[ip]['nwloc']) > 0: full_ctx = inbound[ip]['nwloc'][2].split('.')[0] ctx = get_ctx_name(full_ctx) if ctx not in in_ctxs: in_ctxs[ctx] = 1 else: in_ctxs[ctx] += 1 for ctx in in_ctxs: obj["children"][0]['children'].append({ 'name': ctx, 'size': in_ctxs[ctx] }) #------ Add Outbound ----------------# obj["children"].append({'name':'Outbound Only','children':[],'size':len(outbound)}) out_ctxs = {} for ip in outbound: full_ctx = '' if 'nwloc' in outbound[ip] and len(outbound[ip]['nwloc']) > 0: full_ctx = outbound[ip]['nwloc'][2].split('.')[0] ctx = get_ctx_name(full_ctx) if ctx not in out_ctxs: out_ctxs[ctx] = 1 else: out_ctxs[ctx] += 1 for ctx in out_ctxs: obj["children"][1]['children'].append({ 'name': ctx, 'size': out_ctxs[ctx] }) #------ Add Twoway ----------------# obj["children"].append({'name': 'two way', 'children': [], 'size': len(twoway)}) tw_ctxs = {} for ip in twoway: full_ctx = '' if 'nwloc' in twoway[ip] and len(twoway[ip]['nwloc']) > 0: full_ctx = twoway[ip]['nwloc'][2].split('.')[0] ctx = get_ctx_name(full_ctx) if ctx not in tw_ctxs: tw_ctxs[ctx] = 1 else: tw_ctxs[ctx] += 1 for ctx in tw_ctxs: obj["children"][2]['children'].append({ 'name': ctx, 'size': tw_ctxs[ctx] }) app_path = Configuration.spot() hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \ .format(app_path,date.year,date.month,date.day,anchor.replace(".","_")) data = json.dumps(obj) if HDFSClient.put_file_json(obj,hdfs_path,stats_fpath,overwrite_file=True): return "Stats file successfully created \n" else: return "Stats file couldn't be created \n"
def score_connection(score,date,src_ip=None,dst_ip=None,src_port=None,dst_port=None): if not src_ip and not dst_ip and not src_port and not dst_port: return False db = Configuration.db() # get connections to score connections_query = (""" SELECT tstart,srcip,dstip,sport,dport, ibyt,ipkt FROM {0}.flow_scores WHERE y = {1} AND m={2} AND d={3} """).format(db,date.year,date.month,date.day) connections_filter = "" connections_filter += " AND srcip = '{0}'".format(src_ip) if src_ip else "" connections_filter += " AND dstip = '{0}'".format(dst_ip) if dst_ip else "" connections_filter += " AND sport = {0}" \ .format(str(src_port)) if src_port else "" connections_filter += " AND dport = {0}" \ .format(str(dst_port)) if dst_port else "" connections = ImpalaEngine.execute_query(connections_query + connections_filter) # add score to connections insert_command = (""" INSERT INTO {0}.flow_threat_investigation PARTITION (y={1},m={2},d={3}) VALUES (""") \ .format(db,date.year,date.month,date.day) fb_data = [] first = True num_rows = 0 for row in connections: # insert into flow_threat_investigation. threat_data = (row[0],row[1],row[2],row[3],row[4],score) fb_data.append([score,row[0],row[1],row[2],row[3],row[4],row[5],row[6]]) insert_command += "{0}{1}".format("," if not first else "", threat_data) first = False num_rows += 1 insert_command += ")" if num_rows > 0: ImpalaEngine.execute_query(insert_command) # create feedback file. app_path = Configuration.spot() feedback_path = "{0}/flow/scored_results/{1}{2}{3}/feedback" \ .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2)) append_file = True if len(HDFSClient.list_dir(feedback_path)) == 0: fb_data.insert(0,["sev","tstart","sip","dip","sport","dport","ipkt","ibyt"]) append_file = False HDFSClient.put_file_csv(fb_data,feedback_path,"ml_feedback.csv",\ append_file=append_file) return True
def create_timeline(anchor, clientips, date, top_results): response = "" susp_ips = [] if clientips: srtlist = sorted(list(clientips.items()), key=lambda x: x[1], reverse=True) for val in srtlist[:top_results]: susp_ips.append(val[0]) if anchor != "": db = Configuration.db() time_line_query = (""" SELECT p_threat,tstart,tend,duration,clientip,respcode,respcodename FROM {0}.proxy_timeline WHERE y={1} AND m={2} AND d={3} AND p_threat != '{4}' """).format(db, date.year, date.month, date.day, anchor.replace("'", "//'")) tmp_timeline_data = ImpalaEngine.execute_query_as_list(time_line_query) imp_query = (""" INSERT INTO TABLE {0}.proxy_timeline PARTITION (y={2}, m={3},d={4}) SELECT '{7}' as p_threat, concat(cast(p_date as string), ' ', cast(MIN(p_time) as string)) AS tstart, concat(cast(p_date as string), ' ', cast(MAX(p_time) as string)) AS tend, SUM(duration) AS duration, clientip, respcode,"respCodeName" as respCodeName FROM {0}.proxy WHERE fulluri='{1}' AND clientip IN ({5}) AND y='{2}' AND m='{3}' AND d='{4}' GROUP BY clientip, p_time, respcode, p_date LIMIT {6} """)\ .format(db,anchor,date.year,str(date.month).zfill(2),\ str(date.day).zfill(2),("'" + "','".join(susp_ips) + "'")\ ,top_results,anchor) app_path = Configuration.spot() old_file = "{0}/proxy/hive/oa/timeline/y={1}/m={2}/d={3}"\ .format(app_path,date.year,date.month,date.day) HDFSClient.delete_folder(old_file, "impala") ImpalaEngine.execute_query("invalidate metadata") #Insert temporary values for item in tmp_timeline_data: insert_query = (""" INSERT INTO {0}.proxy_timeline PARTITION(y={1} , m={2} ,d={3}) VALUES ('{4}', '{5}', '{6}',{7},'{8}','{9}','{10}') """)\ .format(db,date.year,date.month,date.day,\ item["p_threat"],item["tstart"],item["tend"],item["duration"],item["clientip"],item["respcode"],item["respcodename"]) ImpalaEngine.execute_query(insert_query) ImpalaEngine.execute_query(imp_query) response = "Timeline successfully saved" else: response = "Timeline couldn't be created"
def create_incident_progression(anchor, inbound, outbound, twoway, date): dendro_fpath = 'threat-dendro-' + anchor.replace('.','_') + ".json" obj = { 'name':anchor, 'children': [], 'time': "" } #----- Add Inbound Connections-------# if len(inbound) > 0: obj["children"].append({'name': 'Inbound Only', 'children': [], 'impact': 0}) in_ctxs = {} for ip in inbound: if 'nwloc' in inbound[ip] and len(inbound[ip]['nwloc']) > 0: ctx = inbound[ip]['nwloc'][2] if ctx not in in_ctxs: in_ctxs[ctx] = 1 else: in_ctxs[ctx] += 1 for ctx in in_ctxs: obj["children"][0]['children'].append({ 'name': ctx, 'impact': in_ctxs[ctx] }) #------ Add Outbound ----------------# if len(outbound) > 0: obj["children"].append({'name':'Outbound Only','children':[],'impact':0}) out_ctxs = {} for ip in outbound: if 'nwloc' in outbound[ip] and len(outbound[ip]['nwloc']) > 0: ctx = outbound[ip]['nwloc'][2] if ctx not in out_ctxs: out_ctxs[ctx] = 1 else: out_ctxs[ctx] += 1 for ctx in out_ctxs: obj["children"][1]['children'].append({ 'name': ctx, 'impact': out_ctxs[ctx] }) #------ Add TwoWay ----------------# if len(twoway) > 0: obj["children"].append({'name':'two way','children': [], 'impact': 0}) tw_ctxs = {} for ip in twoway: if 'nwloc' in twoway[ip] and len(twoway[ip]['nwloc']) > 0: ctx = twoway[ip]['nwloc'][2] if ctx not in tw_ctxs: tw_ctxs[ctx] = 1 else: tw_ctxs[ctx] += 1 for ctx in tw_ctxs: obj["children"][2]['children'].append({ 'name': ctx, 'impact': tw_ctxs[ctx] }) app_path = Configuration.spot() hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \ .format(app_path,date.year,date.month,date.day,anchor.replace(".","_")) if HDFSClient.put_file_json(obj,hdfs_path,dendro_fpath,overwrite_file=True): return "Incident progression successfully created \n" else: return "Incident progression couldn't be created \n"
def create_map_view(ip, inbound, outbound, twoway,date,iploc): iplist = '' globe_fpath = 'globe-' + ip.replace('.','_') + ".json" if os.path.isfile(iploc): iplist = np.loadtxt(iploc,dtype=np.uint32,delimiter=',',usecols={0},\ converters={0: lambda s: np.uint32(s.replace('"',''))}) else: print "No iploc.csv file was found, Map View map won't be created" response = "" if iplist != '': globe_json = {} globe_json['type'] = "FeatureCollection" globe_json['sourceips'] = [] globe_json['destips'] = [] for srcip in twoway: try: row = twoway[srcip]['geo'] globe_json['destips'].append({ 'type': 'Feature', 'properties': { 'location':row[8], 'ip':srcip, 'type':1 }, 'geometry': { 'type': 'Point', 'coordinates': [float(row[7]), float(row[6])] } }) except ValueError: pass for dstip in outbound: try: row = outbound[dstip]['geo'] dst_geo = outbound[dstip]['geo_dst'] globe_json['sourceips'].append({ 'type': 'Feature', 'properties': { 'location':row[8], 'ip':dstip, 'type':3 }, 'geometry': { 'type': 'Point', 'coordinates': [float(row[7]), float(row[6])] } }) globe_json['destips'].append({ 'type': 'Feature', 'properties': { 'location':row[8], 'ip':outbound[dstip]['dst_ip'], 'type':3 }, 'geometry': { 'type': 'Point', 'coordinates': [float(dst_geo[7]), float(dst_geo[6])] } }) except ValueError: pass for dstip in inbound: try: row = inbound[dstip]['geo'] dst_geo = inbound[dstip]['geo_src'] globe_json['sourceips'].append({ 'type': 'Feature', 'properties': { 'location':row[8], 'ip':dstip, 'type':2 }, 'geometry': { 'type': 'Point', 'coordinates': [float(row[7]), float(row[6])] } }) globe_json['destips'].append({ 'type': 'Feature', 'properties': { 'location':row[8], 'ip':inbound[dstip]['src_ip'], 'type':2 }, 'geometry': { 'type': 'Point', 'coordinates': [float(dst_geo[7]), float(dst_geo[6])] } }) except ValueError: pass json_str = json.dumps(globe_json) app_path = Configuration.spot() hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \ .format(app_path,date.year,date.month,date.day,ip.replace(".","_")) if HDFSClient.put_file_json(globe_json,hdfs_path,globe_fpath,overwrite_file=True) : response = "Geolocation map successfully created \n" else: response = "The map can't be created without an iploc file \n" return response
def create_impact_analysis(anchor, inbound, outbound, twoway, threat_name,date): stats_fpath = 'stats-' + anchor.replace('.','_') + ".json" obj = { 'name':threat_name, 'children': [], 'size': len(inbound) + len(outbound) + len(twoway) } #----- Add Inbound Connections-------# obj["children"].append({'name': 'Inbound Only', 'children': [], 'size': len(inbound)}) in_ctxs = {} for ip in inbound: full_ctx = '' if 'nwloc' in inbound[ip] and len(inbound[ip]['nwloc']) > 0: full_ctx = inbound[ip]['nwloc'][2].split('.')[0] ctx = get_ctx_name(full_ctx) if ctx not in in_ctxs: in_ctxs[ctx] = 1 else: in_ctxs[ctx] += 1 for ctx in in_ctxs: obj["children"][0]['children'].append({ 'name': ctx, 'size': in_ctxs[ctx] }) #------ Add Outbound ----------------# obj["children"].append({'name':'Outbound Only','children':[],'size':len(outbound)}) out_ctxs = {} for ip in outbound: full_ctx = '' if 'nwloc' in outbound[ip] and len(outbound[ip]['nwloc']) > 0: full_ctx = outbound[ip]['nwloc'][2].split('.')[0] ctx = get_ctx_name(full_ctx) if ctx not in out_ctxs: out_ctxs[ctx] = 1 else: out_ctxs[ctx] += 1 for ctx in out_ctxs: obj["children"][1]['children'].append({ 'name': ctx, 'size': out_ctxs[ctx] }) #------ Add Twoway ----------------# obj["children"].append({'name': 'two way', 'children': [], 'size': len(twoway)}) tw_ctxs = {} for ip in twoway: full_ctx = '' if 'nwloc' in twoway[ip] and len(twoway[ip]['nwloc']) > 0: full_ctx = twoway[ip]['nwloc'][2].split('.')[0] ctx = get_ctx_name(full_ctx) if ctx not in tw_ctxs: tw_ctxs[ctx] = 1 else: tw_ctxs[ctx] += 1 for ctx in tw_ctxs: obj["children"][2]['children'].append({ 'name': ctx, 'size': tw_ctxs[ctx] }) app_path = Configuration.spot() hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \ .format(app_path,date.year,date.month,date.day,anchor.replace(".","_")) data = json.dumps(obj) if HDFSClient.put_file_json(obj,hdfs_path,stats_fpath,overwrite_file=True): return "Stats file successfully created \n" else: return "Stats file couldn't be created \n"
def score_request(date, score, uri): if not score and not uri: return None db = Configuration.db() p_query = (""" SELECT tdate,time,clientip,host,reqmethod,useragent,resconttype ,duration,username,webcat,referer,respcode,uriport ,uripath,uriquery,serverip,scbytes,csbytes,fulluri ,word,ml_score,uri_rep,respcode_name,network_context FROM {0}.proxy_scores WHERE y={1} and m={2} and d={3} AND fulluri = '{4}' """).format(db, date.year, date.month, date.day, uri) connections = ImpalaEngine.execute_query(p_query) # add score to connections insert_command = (""" INSERT INTO {0}.proxy_threat_investigation PARTITION (y={1},m={2},d={3}) VALUES (""") \ .format(db,date.year,date.month,date.day) fb_data = [] first = True num_rows = 0 for row in connections: cip_index = row[2] uri_index = row[18] tme_index = row[2] hash_field = [str( md5.new(str(cip_index) + str(uri_index)).hexdigest() \ + str((tme_index.split(":"))[0]) )] threat_data = (row[0], row[18], score) fb_data.append([row[0],row[1],row[2],row[3],row[4],row[5],row[6],row[7] \ ,row[8],row[9],row[10],row[11],row[12],row[13],row[14],row[15] \ ,row[16],row[17],row[18],row[19],score,row[20],row[21],row[22], \ row[23],hash_field]) insert_command += "{0}{1}".format("," if not first else "", threat_data) first = False num_rows += 1 insert_command += ")" if num_rows > 0: ImpalaEngine.execute_query(insert_command) # create feedback file. app_path = Configuration.spot() feedback_path = "{0}/proxy/scored_results/{1}{2}{3}/feedback"\ .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2)) ap_file = True if len(HDFSClient.list_dir(feedback_path)) == 0: fb_data.insert(0,["p_date","p_time","clientip","host","reqmethod",\ "useragent","resconttype","duration","username","webcat","referer",\ "respcode","uriport","uripath","uriquery","serverip","scbytes","csbytes",\ "fulluri","word","score","uri_rep","uri_sev","respcode_name",\ "network_context","hash"]) ap_file = False HDFSClient.put_file_csv(fb_data, feedback_path, "ml_feedback.csv", append_file=ap_file) return True
def score_connection(date,ip="", dns="", ip_sev=0, dns_sev=0): if (not ip and not ip_sev) and (not dns and not dns_sev): return False db = Configuration.db() sq_query = (""" SELECT frame_time,unix_tstamp,frame_len,ip_dst,dns_qry_name,dns_qry_class, dns_qry_type,dns_qry_rcode,ml_score,tld,query_rep, hh,dns_qry_class_name,dns_qry_type_name,dns_qry_rcode_name, network_context FROM {0}.dns_scores WHERE y={1} and m={2} and d={3} AND ( """).format(db,date.year,date.month,date.day) connections_filter = "" connections_filter += "ip_dst = '{0}' ".format(ip) if ip else "" connections_filter += " OR " if ip and dns else "" connections_filter += "dns_qry_name = '{0}' ".format(dns) if dns else "" connections_filter += ")" connections = ImpalaEngine.execute_query(sq_query + connections_filter) # add score to connections insert_command = ("""INSERT INTO {0}.dns_threat_investigation PARTITION (y={1},m={2},d={3}) VALUES (""") \ .format(db,date.year,date.month,date.day) fb_data = [] first = True num_rows = 0 for row in connections: # insert into dns_threat_investigation. threat_data = (row[1],row[3],row[4],ip_sev if ip == row[3] else 0,\ dns_sev if dns == row[4] else 0) fb_data.append([row[0],row[2],row[3],row[4],row[5],row[6],row[7],\ row[8],row[9],row[10],row[11],ip_sev,dns_sev,row[12],row[13],row[14],\ row[15],row[1]]) insert_command += "{0}{1}".format("," if not first else "", threat_data) first = False num_rows += 1 insert_command += ")" if num_rows > 0: ImpalaEngine.execute_query(insert_command) # create feedback file. app_path = Configuration.spot() feedback_path = "{0}/dns/scored_results/{1}{2}{3}/feedback"\ .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2)) ap_file = True if len(HDFSClient.list_dir(feedback_path)) == 0: fb_data.insert(0,["frame_time","frame_len","ip_dst","dns_qry_name",\ "dns_qry_class","dns_qry_type","dns_qry_rcode","score","tld","query_rep",\ "hh","ip_sev","dns_sev","dns_qry_class_name","dns_qry_type_name",\ "dns_qry_rcode_name","network_context","unix_tstamp"]) ap_file = False HDFSClient.put_file_csv(fb_data,feedback_path,"ml_feedback.csv",append_file=ap_file) return True