示例#1
0
def impact_analysis(ip,date):

    app_path = Configuration.spot()
    file_name = "stats-{0}.json".format(ip.replace(".","_"))
    hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \
    .format(app_path,date.year,date.month,date.day,ip.replace(".","_"))

    if HDFSClient.file_exists(hdfs_path,file_name):
        return json.loads(HDFSClient.get_file("{0}/{1}" \
        .format(hdfs_path,file_name)))
    else:
        return {}
示例#2
0
def impact_analysis(ip,date):

    app_path = Configuration.spot()
    file_name = "stats-{0}.json".format(ip.replace(".","_"))
    hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \
    .format(app_path,date.year,date.month,date.day,ip.replace(".","_"))

    if HDFSClient.file_exists(hdfs_path,file_name):
        return json.loads(HDFSClient.get_file("{0}/{1}" \
        .format(hdfs_path,file_name)))
    else:
        return {}
示例#3
0
def incident_progression(ip,date):

    app_path = Configuration.spot()
    file_name = "threat-dendro-{0}.json".format(ip.replace(".","_"))

    hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \
    .format(app_path,date.year,date.month,date.day,ip.replace(".","_"))

    if HDFSClient.file_exists(hdfs_path,file_name):
        return json.loads(HDFSClient.get_file("{0}/{1}" \
        .format(hdfs_path,file_name)))
    else:
        return {}
示例#4
0
def create_incident_progression(anchor,requests,referers,date):

    hash_name = md5.new(str(anchor)).hexdigest()
    file_name = "incident-progression-{0}.json".format(hash_name)
    app_path = Configuration.spot()
    hdfs_path = "{0}/proxy/oa/storyboard/{1}/{2}/{3}"\
    .format(app_path,date.year,date.month,date.day)

    data = {'fulluri':anchor, 'requests':requests,'referer_for':referers.keys()}
    if HDFSClient.put_file_json(data,hdfs_path,file_name,overwrite_file=True) :
        response = "Incident progression successfuly created"
    else:
        return False
示例#5
0
def incident_progression(ip,date):

    app_path = Configuration.spot()
    file_name = "threat-dendro-{0}.json".format(ip.replace(".","_"))

    hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \
    .format(app_path,date.year,date.month,date.day,ip.replace(".","_"))

    if HDFSClient.file_exists(hdfs_path,file_name):
        return json.loads(HDFSClient.get_file("{0}/{1}" \
        .format(hdfs_path,file_name)))
    else:
        return {}
示例#6
0
def incident_progression(date,uri):

    app_path = Configuration.spot()
    hdfs_path = "{0}/proxy/oa/storyboard/{1}/{2}/{3}".format(app_path,\
        date.year,date.month,date.day)

    hash_name = md5.new(str(uri)).hexdigest()
    file_name = "incident-progression-{0}.json".format(hash_name)

    if HDFSClient.file_exists(hdfs_path,file_name):
        return json.loads(HDFSClient.get_file("{0}/{1}"\
        .format(hdfs_path,file_name)))
    else:
        return {}
示例#7
0
def incident_progression(date, uri):

    app_path = Configuration.spot()
    hdfs_path = "{0}/proxy/oa/storyboard/{1}/{2}/{3}".format(app_path,\
        date.year,date.month,date.day)

    hash_name = md5.new(str(uri)).hexdigest()
    file_name = "incident-progression-{0}.json".format(hash_name)

    if HDFSClient.file_exists(hdfs_path, file_name):
        return json.loads(HDFSClient.get_file("{0}/{1}"\
        .format(hdfs_path,file_name)))
    else:
        return {}
示例#8
0
def save_comments(anchor, ip, query, title, text, date):

    db = Configuration.db()
    sb_query = ("""
            SELECT
                ip_threat,dns_threat,title,text
            FROM
                {0}.dns_storyboard
            WHERE
                y = {1} AND m= {2} AND d={3}
            """).format(db, date.year, date.month, date.day)
    sb_data = ImpalaEngine.execute_query_as_list(sb_query)

    # find value if already exists.
    saved = False
    for item in sb_data:
        if item["ip_threat"] == anchor or item["dns_threat"] == anchor:
            item["title"] = title
            item["text"] = text
            saved = True

    if not saved:
        sb_data.append({
            'text': text,
            'ip_threat': str(ip),
            'title': title,
            'dns_threat': query
        })

    #remove old file.
    app_path = Configuration.spot()
    old_file = "{0}/dns/hive/oa/storyboard/y={1}/m={2}/d={3}/"\
    .format(app_path,date.year,date.month,date.day)

    HDFSClient.delete_folder(old_file, "impala")
    ImpalaEngine.execute_query("invalidate metadata")

    for item in sb_data:
        insert_query = ("""
         	INSERT INTO {0}.dns_storyboard PARTITION(y={1} , m={2} ,d={3})
            	VALUES ( '{4}', '{5}', '{6}','{7}')
            	""")\
                       .format(db,date.year,date.month,date.day,\
                       item["ip_threat"],item["dns_threat"],item["title"],item["text"])
        ImpalaEngine.execute_query(insert_query)

    return True
示例#9
0
def save_comment(ip,title,text,date):

    #Get current table info.
    db = Configuration.db()
    sb_query = ("""
            SELECT
                ip_threat,title,text
            FROM
                {0}.flow_storyboard
            WHERE
                y = {1} AND m= {2} AND d={3}
            """).format(db,date.year,date.month,date.day)

    sb_data = ImpalaEngine.execute_query_as_list(sb_query)

    # find value if already exists.
    saved = False
    for item in sb_data:
        if item["ip_threat"] == ip:
            item["title"] = title
            item["text"] = text
            saved = True

    if not saved:
        sb_data.append({'text': text, 'ip_threat': str(ip), 'title': title})

    #remove old file.
    app_path = Configuration.spot()
    old_file = "{0}/flow/hive/oa/storyboard/y={1}/m={2}/d={3}/" \
    .format(app_path,date.year,date.month,date.day)

    # remove file manually to allow the comments update.
    HDFSClient.delete_folder(old_file,"impala")
    ImpalaEngine.execute_query("invalidate metadata")

    for item in sb_data:
	insert_query = ("""
         	INSERT INTO {0}.flow_storyboard PARTITION(y={1} , m={2} ,d={3})
            	VALUES ( '{4}', '{5}','{6}')
            	""") \
                .format(db,date.year,date.month,date.day, \
                item["ip_threat"],item["title"],item["text"])

        ImpalaEngine.execute_query(insert_query)
    return True
示例#10
0
def reset_scored_connections(date):

    flow_storyboard =  "flow/hive/oa/storyboard"
    flow_threat_investigation = "flow/hive/oa/threat_investigation"
    flow_timeline = "flow/hive/oa/timeline"    
    app_path = Configuration.spot()   

    try:
        # remove parquet files manually to allow the comments update.
        HDFSClient.delete_folder("{0}/{1}/y={2}/m={3}/d={4}/".format( \
            app_path,flow_storyboard,date.year,date.month,date.day) , "impala")
        HDFSClient.delete_folder("{0}/{1}/y={2}/m={3}/d={4}/".format( \
            app_path,flow_threat_investigation,date.year,date.month,date.day), "impala")
        HDFSClient.delete_folder("{0}/{1}/y={2}/m={3}/d={4}/".format( \
            app_path,flow_timeline,date.year,date.month,date.day), "impala")
        ImpalaEngine.execute_query("invalidate metadata")
        return True
        
    except HdfsError:
        return False
示例#11
0
def reset_scored_connections(date):

    proxy_storyboard = "proxy/hive/oa/storyboard"
    proxy_threat_investigation = "dns_threat_dendro/hive/oa/timeline"
    proxy_timeline = "proxy/hive/oa/threat_investigation"
    app_path = Configuration.spot()

    try:
        # remove parquet files manually to allow the comments update.
        HDFSClient.delete_folder("{0}/{1}/y={2}/m={3}/d={4}/".format( \
            app_path,proxy_storyboard,date.year,date.month,date.day) , "impala")
        HDFSClient.delete_folder("{0}/{1}/y={2}/m={3}/d={4}/".format( \
            app_path,proxy_threat_investigation,date.year,date.month,date.day), "impala")
        HDFSClient.delete_folder("{0}/{1}/y={2}/m={3}/d={4}/".format( \
            app_path,proxy_timeline,date.year,date.month,date.day), "impala")
        ImpalaEngine.execute_query("invalidate metadata")
        return True

    except HdfsError:
        return False
示例#12
0
def create_incident_progression(anchor, requests, referers, date):

    hash_name = md5.new(str(anchor)).hexdigest()
    file_name = "incident-progression-{0}.json".format(hash_name)
    app_path = Configuration.spot()
    hdfs_path = "{0}/proxy/oa/storyboard/{1}/{2}/{3}"\
    .format(app_path,date.year,date.month,date.day)

    data = {
        'fulluri': anchor,
        'requests': requests,
        'referer_for': referers.keys()
    }
    if HDFSClient.put_file_json(data,
                                hdfs_path,
                                file_name,
                                overwrite_file=True):
        response = "Incident progression successfuly created"
    else:
        return False
示例#13
0
def create_map_view(ip, inbound, outbound, twoway,date,iploc):

    iplist = ''
    globe_fpath = 'globe-' + ip.replace('.','_') + ".json"
    if os.path.isfile(iploc):
        iplist = np.loadtxt(iploc,dtype=np.uint32,delimiter=',',usecols={0},\
        converters={0: lambda s: np.uint32(s.replace('"',''))})
    else:
        print "No iploc.csv file was found, Map View map won't be created"

    response = ""
    if iplist != '':
        
        globe_json = {}
        globe_json['type'] = "FeatureCollection"
        globe_json['sourceips'] = []
        globe_json['destips'] = []
        for srcip in twoway:
            try:
                row =  twoway[srcip]['geo']
                globe_json['destips'].append({
                        'type': 'Feature',
                        'properties': {
                            'location':row[8],
                            'ip':srcip,
                            'type':1
                        },
                        'geometry': {
                            'type': 'Point',
                            'coordinates': [float(row[7]), float(row[6])]
                        }
                    })
            except ValueError:
                pass
        for dstip in outbound:
            try:
                row =  outbound[dstip]['geo']
                dst_geo = outbound[dstip]['geo_dst']
                globe_json['sourceips'].append({
                        'type': 'Feature',
                        'properties': {
                            'location':row[8],
                            'ip':dstip,
                            'type':3
                        },
                        'geometry': {
                            'type': 'Point',
                            'coordinates': [float(row[7]), float(row[6])]
                        }
                    })
                globe_json['destips'].append({
                        'type': 'Feature',
                        'properties': {
                            'location':row[8],
                            'ip':outbound[dstip]['dst_ip'],
                            'type':3
                        },
                        'geometry': {
                            'type': 'Point',
                            'coordinates': [float(dst_geo[7]), float(dst_geo[6])]
                        }
                    })
            except ValueError:
                pass
        for dstip in inbound:
            try:
                row =  inbound[dstip]['geo']
                dst_geo = inbound[dstip]['geo_src']
                globe_json['sourceips'].append({
                        'type': 'Feature',
                        'properties': {
                            'location':row[8],
                            'ip':dstip,
                            'type':2
                        },
                        'geometry': {
                            'type': 'Point',
                            'coordinates': [float(row[7]), float(row[6])]
                        }
                    })
                globe_json['destips'].append({
                        'type': 'Feature',
                        'properties': {
                            'location':row[8],
                            'ip':inbound[dstip]['src_ip'],
                            'type':2
                        },
                        'geometry': {
                            'type': 'Point',
                            'coordinates': [float(dst_geo[7]), float(dst_geo[6])]
                        }
                    })
            except ValueError:
                pass
        json_str = json.dumps(globe_json)
        app_path = Configuration.spot()
        hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \
        .format(app_path,date.year,date.month,date.day,ip.replace(".","_"))

        if HDFSClient.put_file_json(globe_json,hdfs_path,globe_fpath,overwrite_file=True) :
            response = "Geolocation map successfully created \n"
        else:
            response = "The map can't be created without an iploc file \n"

    return response
示例#14
0
def create_incident_progression(anchor, inbound, outbound, twoway, date):

    dendro_fpath = 'threat-dendro-' + anchor.replace('.','_') + ".json"
    obj = {
        'name':anchor,
        'children': [],
        'time': ""
    }

    #----- Add Inbound Connections-------#
    obj["children"].append({'name': 'Inbound Only', 'children': [], 'impact': 0})
    if len(inbound) > 0:
        in_ctxs = {}
        for ip in inbound:
            if 'nwloc' in inbound[ip] and len(inbound[ip]['nwloc']) > 0:
                ctx = inbound[ip]['nwloc'][2]
                if ctx not in in_ctxs:
                    in_ctxs[ctx] = 1
                else:
                    in_ctxs[ctx] += 1
        for ctx in in_ctxs:
            obj["children"][0]['children'].append({
                    'name': ctx,
                    'impact': in_ctxs[ctx]
                })

    #------ Add Outbound ----------------#
    obj["children"].append({'name':'Outbound Only','children':[],'impact':0})
    if len(outbound) > 0:
        out_ctxs = {}
        for ip in outbound:
            if 'nwloc' in outbound[ip] and len(outbound[ip]['nwloc']) > 0:
                ctx = outbound[ip]['nwloc'][2]
                if ctx not in out_ctxs:
                    out_ctxs[ctx] = 1
                else:
                    out_ctxs[ctx] += 1
        for ctx in out_ctxs:
            obj["children"][1]['children'].append({
                    'name': ctx,
                    'impact': out_ctxs[ctx]
                })

    #------ Add TwoWay ----------------#
    obj["children"].append({'name':'two way','children': [], 'impact': 0})
    if len(twoway) > 0:
        tw_ctxs = {}
        for ip in twoway:
            if 'nwloc' in twoway[ip] and len(twoway[ip]['nwloc']) > 0:
                ctx = twoway[ip]['nwloc'][2]
                if ctx not in tw_ctxs:
                    tw_ctxs[ctx] = 1
                else:
                    tw_ctxs[ctx] += 1

        for ctx in tw_ctxs:
            obj["children"][2]['children'].append({
                    'name': ctx,
                    'impact': tw_ctxs[ctx]
                })

    app_path = Configuration.spot()
    hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \
    .format(app_path,date.year,date.month,date.day,anchor.replace(".","_"))

    if HDFSClient.put_file_json(obj,hdfs_path,dendro_fpath,overwrite_file=True):
        return "Incident progression successfully created \n"
    else:
        return "Incident progression couldn't be created \n"
示例#15
0
def score_connection(score,date,src_ip=None,dst_ip=None,src_port=None,dst_port=None):

    if not src_ip and not dst_ip and not src_port and not dst_port:
        return False

    db = Configuration.db()
    # get connections to score
    connections_query = ("""
            SELECT
                tstart,srcip,dstip,sport,dport, ibyt,ipkt
            FROM {0}.flow_scores
            WHERE
                y = {1} AND m={2} AND d={3}
            """).format(db,date.year,date.month,date.day)

    connections_filter = ""
    connections_filter += " AND srcip = '{0}'".format(src_ip) if src_ip else ""
    connections_filter += " AND dstip = '{0}'".format(dst_ip) if dst_ip else ""

    connections_filter += " AND sport = {0}" \
    .format(str(src_port)) if src_port else ""

    connections_filter += " AND dport = {0}" \
    .format(str(dst_port)) if dst_port else ""
    connections = ImpalaEngine.execute_query(connections_query + connections_filter)


    # add score to connections
    insert_command = ("""
        INSERT INTO {0}.flow_threat_investigation
        PARTITION (y={1},m={2},d={3})
        VALUES (""") \
        .format(db,date.year,date.month,date.day)

    fb_data =  []
    first = True
    num_rows = 0
    for row in connections:
        # insert into flow_threat_investigation.
        threat_data = (row[0],row[1],row[2],row[3],row[4],score)
        fb_data.append([score,row[0],row[1],row[2],row[3],row[4],row[5],row[6]])
        insert_command += "{0}{1}".format("," if not first else "", threat_data)
        first = False
        num_rows += 1

    insert_command += ")"
    if num_rows > 0: ImpalaEngine.execute_query(insert_command)

    # create feedback file.
    app_path = Configuration.spot()
    feedback_path = "{0}/flow/scored_results/{1}{2}{3}/feedback" \
    .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2))

    append_file = True
    if len(HDFSClient.list_dir(feedback_path)) == 0:
        fb_data.insert(0,["sev","tstart","sip","dip","sport","dport","ipkt","ibyt"])
        append_file = False

    HDFSClient.put_file_csv(fb_data,feedback_path,"ml_feedback.csv",\
    append_file=append_file)
    return True
示例#16
0
def score_request(date,score,uri):

    if not score and not uri:
	return None

    db = Configuration.db()
    p_query = ("""
		SELECT
		    tdate,time,clientip,host,reqmethod,useragent,resconttype
		    ,duration,username,webcat,referer,respcode,uriport
		    ,uripath,uriquery,serverip,scbytes,csbytes,fulluri
		    ,word,ml_score,uri_rep,respcode_name,network_context
		FROM
		    {0}.proxy_scores
		WHERE
		    y={1} and m={2} and d={3}
		    AND fulluri = '{4}'
		""").format(db,date.year,date.month,date.day,uri)

    connections = ImpalaEngine.execute_query(p_query)

    # add score to connections
    insert_command = ("""
		INSERT INTO {0}.proxy_threat_investigation PARTITION (y={1},m={2},d={3})
		VALUES (""") \
        .format(db,date.year,date.month,date.day)

    fb_data =  []
    first = True
    num_rows = 0
    for row in connections:
        cip_index = row[2]
        uri_index = row[18]
        tme_index = row[2]
        hash_field = [str( md5.new(str(cip_index) + str(uri_index)).hexdigest() \
        + str((tme_index.split(":"))[0]) )]

        threat_data = (row[0],row[18],score)
        fb_data.append([row[0],row[1],row[2],row[3],row[4],row[5],row[6],row[7] \
			,row[8],row[9],row[10],row[11],row[12],row[13],row[14],row[15] \
			,row[16],row[17],row[18],row[19],score,row[20],row[21],row[22], \
			row[23],hash_field])
        insert_command += "{0}{1}".format("," if not first else "", threat_data)
        first = False
        num_rows += 1

    insert_command += ")"
    if num_rows > 0: ImpalaEngine.execute_query(insert_command)

    # create feedback file.
    app_path = Configuration.spot()
    feedback_path = "{0}/proxy/scored_results/{1}{2}{3}/feedback"\
    .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2))

    ap_file = True
    if len(HDFSClient.list_dir(feedback_path)) == 0:
    	fb_data.insert(0,["p_date","p_time","clientip","host","reqmethod",\
        "useragent","resconttype","duration","username","webcat","referer",\
        "respcode","uriport","uripath","uriquery","serverip","scbytes","csbytes",\
        "fulluri","word","score","uri_rep","uri_sev","respcode_name",\
        "network_context","hash"])
        ap_file = False

    HDFSClient.put_file_csv(fb_data,feedback_path,"ml_feedback.csv",append_file=ap_file)
    return True
示例#17
0
def score_connection(date, ip="", dns="", ip_sev=0, dns_sev=0):

    if (not ip and not ip_sev) and (not dns and not dns_sev):
        return False

    db = Configuration.db()
    sq_query = ("""
		SELECT
    	    frame_time,unix_tstamp,frame_len,ip_dst,dns_qry_name,dns_qry_class,
		    dns_qry_type,dns_qry_rcode,ml_score,tld,query_rep,
		    hh,dns_qry_class_name,dns_qry_type_name,dns_qry_rcode_name,
		    network_context
		FROM
		    {0}.dns_scores
		WHERE
		    y={1} and m={2} and d={3}
            AND (
		""").format(db, date.year, date.month, date.day)

    connections_filter = ""
    connections_filter += "ip_dst = '{0}' ".format(ip) if ip else ""
    connections_filter += " OR " if ip and dns else ""
    connections_filter += "dns_qry_name = '{0}' ".format(dns) if dns else ""
    connections_filter += ")"
    connections = ImpalaEngine.execute_query(sq_query + connections_filter)

    # add score to connections

    insert_command = ("""INSERT INTO {0}.dns_threat_investigation
                        PARTITION (y={1},m={2},d={3})
                        VALUES (""") \
                        .format(db,date.year,date.month,date.day)

    fb_data = []
    first = True
    num_rows = 0
    for row in connections:
        # insert into dns_threat_investigation.
        threat_data = (row[1],row[3],row[4],ip_sev if ip == row[3] else 0,\
        dns_sev if dns == row[4] else 0)

        fb_data.append([row[0],row[2],row[3],row[4],row[5],row[6],row[7],\
        row[8],row[9],row[10],row[11],ip_sev,dns_sev,row[12],row[13],row[14],\
        row[15],row[1]])

        insert_command += "{0}{1}".format("," if not first else "",
                                          threat_data)
        first = False
        num_rows += 1

    insert_command += ")"
    if num_rows > 0: ImpalaEngine.execute_query(insert_command)

    # create feedback file.
    app_path = Configuration.spot()
    feedback_path = "{0}/dns/scored_results/{1}{2}{3}/feedback"\
    .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2))
    ap_file = True

    if len(HDFSClient.list_dir(feedback_path)) == 0:
        fb_data.insert(0,["frame_time","frame_len","ip_dst","dns_qry_name",\
        "dns_qry_class","dns_qry_type","dns_qry_rcode","score","tld","query_rep",\
        "hh","ip_sev","dns_sev","dns_qry_class_name","dns_qry_type_name",\
        "dns_qry_rcode_name","network_context","unix_tstamp"])
        ap_file = False

    HDFSClient.put_file_csv(fb_data,
                            feedback_path,
                            "ml_feedback.csv",
                            append_file=ap_file)
    return True
示例#18
0
def create_timeline(anchor,clientips,date,top_results):
    response = ""
    susp_ips = []

    if clientips:
        srtlist = sorted(list(clientips.items()), key=lambda x: x[1], reverse=True)
        for val in srtlist[:top_results]:
            susp_ips.append(val[0])

    if anchor != "":
        db = Configuration.db()
        time_line_query = ("""
                SELECT p_threat,tstart,tend,duration,clientip,respcode,respcodename
                FROM {0}.proxy_timeline
                WHERE
                    y={1} AND m={2} AND d={3} AND p_threat != '{4}'
                """).format(db,date.year,date.month,date.day,anchor.replace("'","//'"))
        
        tmp_timeline_data = ImpalaEngine.execute_query_as_list(time_line_query)

        imp_query = ("""
                        INSERT INTO TABLE {0}.proxy_timeline
                        PARTITION (y={2}, m={3},d={4})
                        SELECT
                            '{7}' as p_threat, concat(cast(p_date as string),
                            ' ', cast(MIN(p_time) as string)) AS tstart,
                            concat(cast(p_date as string), ' ',
                            cast(MAX(p_time) as string)) AS tend,
                            SUM(duration) AS duration,
                            clientip, respcode,"respCodeName" as respCodeName
                        FROM {0}.proxy
                        WHERE fulluri='{1}' AND clientip IN ({5})
                        AND y='{2}' AND m='{3}' AND d='{4}'
                        GROUP BY clientip, p_time, respcode, p_date
                        LIMIT {6}
                    """)\
                    .format(db,anchor,date.year,str(date.month).zfill(2),\
                    str(date.day).zfill(2),("'" + "','".join(susp_ips) + "'")\
                    ,top_results,anchor)

        app_path = Configuration.spot()
        old_file = "{0}/proxy/hive/oa/timeline/y={1}/m={2}/d={3}"\
        .format(app_path,date.year,date.month,date.day)

        HDFSClient.delete_folder(old_file,"impala")
        ImpalaEngine.execute_query("invalidate metadata")

        #Insert temporary values
        for item in tmp_timeline_data:
            insert_query = ("""
                        INSERT INTO {0}.proxy_timeline PARTITION(y={1} , m={2} ,d={3})
                        VALUES ('{4}', '{5}', '{6}',{7},'{8}','{9}','{10}')
                        """)\
                        .format(db,date.year,date.month,date.day,\
                        item["p_threat"],item["tstart"],item["tend"],item["duration"],item["clientip"],item["respcode"],item["respcodename"])

            ImpalaEngine.execute_query(insert_query)

        ImpalaEngine.execute_query(imp_query)
        response = "Timeline successfully saved"
    else:
        response = "Timeline couldn't be created"
示例#19
0
def create_impact_analysis(anchor, inbound, outbound, twoway, threat_name,date):

    stats_fpath = 'stats-' + anchor.replace('.','_') + ".json"

    obj = {
        'name':threat_name,
        'children': [],
        'size': len(inbound) + len(outbound) + len(twoway)
    }

    #----- Add Inbound Connections-------#
    obj["children"].append({'name': 'Inbound Only', 'children': [], 'size': len(inbound)})
    in_ctxs = {}
    for ip in inbound:
        full_ctx = ''
        if 'nwloc' in inbound[ip] and len(inbound[ip]['nwloc']) > 0:
            full_ctx = inbound[ip]['nwloc'][2].split('.')[0]
        ctx = get_ctx_name(full_ctx)
        if ctx not in in_ctxs:
            in_ctxs[ctx] = 1
        else:
            in_ctxs[ctx] += 1
    for ctx in in_ctxs:
        obj["children"][0]['children'].append({
                'name': ctx,
                'size': in_ctxs[ctx]
            })


    #------ Add Outbound ----------------#
    obj["children"].append({'name':'Outbound Only','children':[],'size':len(outbound)})
    out_ctxs = {}
    for ip in outbound:
        full_ctx = ''
        if 'nwloc' in outbound[ip] and len(outbound[ip]['nwloc']) > 0:
            full_ctx = outbound[ip]['nwloc'][2].split('.')[0]
        ctx = get_ctx_name(full_ctx)
        if ctx not in out_ctxs:
            out_ctxs[ctx] = 1
        else:
            out_ctxs[ctx] += 1
    for ctx in out_ctxs:
        obj["children"][1]['children'].append({
                'name': ctx,
                'size': out_ctxs[ctx]
            })

    #------ Add Twoway ----------------#
    obj["children"].append({'name': 'two way', 'children': [], 'size': len(twoway)})
    tw_ctxs = {}
    for ip in twoway:
        full_ctx = ''
        if 'nwloc' in twoway[ip] and len(twoway[ip]['nwloc']) > 0:
            full_ctx = twoway[ip]['nwloc'][2].split('.')[0]
        ctx = get_ctx_name(full_ctx)
        if ctx not in tw_ctxs:
            tw_ctxs[ctx] = 1
        else:
            tw_ctxs[ctx] += 1

    for ctx in tw_ctxs:
        obj["children"][2]['children'].append({
                'name': ctx,
                'size': tw_ctxs[ctx]
            })

    app_path = Configuration.spot()
    hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \
    .format(app_path,date.year,date.month,date.day,anchor.replace(".","_"))

    data = json.dumps(obj)
    if HDFSClient.put_file_json(obj,hdfs_path,stats_fpath,overwrite_file=True):
        return "Stats file successfully created \n"
    else:
        return "Stats file couldn't be created \n"
示例#20
0
def score_connection(score,date,src_ip=None,dst_ip=None,src_port=None,dst_port=None):

    if not src_ip and not dst_ip and not src_port and not dst_port:
        return False

    db = Configuration.db()
    # get connections to score
    connections_query = ("""
            SELECT
                tstart,srcip,dstip,sport,dport, ibyt,ipkt
            FROM {0}.flow_scores
            WHERE
                y = {1} AND m={2} AND d={3}
            """).format(db,date.year,date.month,date.day)

    connections_filter = ""
    connections_filter += " AND srcip = '{0}'".format(src_ip) if src_ip else ""
    connections_filter += " AND dstip = '{0}'".format(dst_ip) if dst_ip else ""

    connections_filter += " AND sport = {0}" \
    .format(str(src_port)) if src_port else ""

    connections_filter += " AND dport = {0}" \
    .format(str(dst_port)) if dst_port else ""
    connections = ImpalaEngine.execute_query(connections_query + connections_filter)


    # add score to connections
    insert_command = ("""
        INSERT INTO {0}.flow_threat_investigation
        PARTITION (y={1},m={2},d={3})
        VALUES (""") \
        .format(db,date.year,date.month,date.day)

    fb_data =  []
    first = True
    num_rows = 0
    for row in connections:
        # insert into flow_threat_investigation.
        threat_data = (row[0],row[1],row[2],row[3],row[4],score)
        fb_data.append([score,row[0],row[1],row[2],row[3],row[4],row[5],row[6]])
        insert_command += "{0}{1}".format("," if not first else "", threat_data)
        first = False
        num_rows += 1

    insert_command += ")"
    if num_rows > 0: ImpalaEngine.execute_query(insert_command)

    # create feedback file.
    app_path = Configuration.spot()
    feedback_path = "{0}/flow/scored_results/{1}{2}{3}/feedback" \
    .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2))

    append_file = True
    if len(HDFSClient.list_dir(feedback_path)) == 0:
        fb_data.insert(0,["sev","tstart","sip","dip","sport","dport","ipkt","ibyt"])
        append_file = False

    HDFSClient.put_file_csv(fb_data,feedback_path,"ml_feedback.csv",\
    append_file=append_file)
    return True
示例#21
0
def create_timeline(anchor, clientips, date, top_results):
    response = ""
    susp_ips = []

    if clientips:
        srtlist = sorted(list(clientips.items()),
                         key=lambda x: x[1],
                         reverse=True)
        for val in srtlist[:top_results]:
            susp_ips.append(val[0])

    if anchor != "":
        db = Configuration.db()
        time_line_query = ("""
                SELECT p_threat,tstart,tend,duration,clientip,respcode,respcodename
                FROM {0}.proxy_timeline
                WHERE
                    y={1} AND m={2} AND d={3} AND p_threat != '{4}'
                """).format(db, date.year, date.month, date.day,
                            anchor.replace("'", "//'"))

        tmp_timeline_data = ImpalaEngine.execute_query_as_list(time_line_query)

        imp_query = ("""
                        INSERT INTO TABLE {0}.proxy_timeline
                        PARTITION (y={2}, m={3},d={4})
                        SELECT
                            '{7}' as p_threat, concat(cast(p_date as string),
                            ' ', cast(MIN(p_time) as string)) AS tstart,
                            concat(cast(p_date as string), ' ',
                            cast(MAX(p_time) as string)) AS tend,
                            SUM(duration) AS duration,
                            clientip, respcode,"respCodeName" as respCodeName
                        FROM {0}.proxy
                        WHERE fulluri='{1}' AND clientip IN ({5})
                        AND y='{2}' AND m='{3}' AND d='{4}'
                        GROUP BY clientip, p_time, respcode, p_date
                        LIMIT {6}
                    """)\
                    .format(db,anchor,date.year,str(date.month).zfill(2),\
                    str(date.day).zfill(2),("'" + "','".join(susp_ips) + "'")\
                    ,top_results,anchor)

        app_path = Configuration.spot()
        old_file = "{0}/proxy/hive/oa/timeline/y={1}/m={2}/d={3}"\
        .format(app_path,date.year,date.month,date.day)

        HDFSClient.delete_folder(old_file, "impala")
        ImpalaEngine.execute_query("invalidate metadata")

        #Insert temporary values
        for item in tmp_timeline_data:
            insert_query = ("""
                        INSERT INTO {0}.proxy_timeline PARTITION(y={1} , m={2} ,d={3})
                        VALUES ('{4}', '{5}', '{6}',{7},'{8}','{9}','{10}')
                        """)\
                        .format(db,date.year,date.month,date.day,\
                        item["p_threat"],item["tstart"],item["tend"],item["duration"],item["clientip"],item["respcode"],item["respcodename"])

            ImpalaEngine.execute_query(insert_query)

        ImpalaEngine.execute_query(imp_query)
        response = "Timeline successfully saved"
    else:
        response = "Timeline couldn't be created"
示例#22
0
def create_incident_progression(anchor, inbound, outbound, twoway, date):

    dendro_fpath = 'threat-dendro-' + anchor.replace('.','_') + ".json"
    obj = {
        'name':anchor,
        'children': [],
        'time': ""
    }

    #----- Add Inbound Connections-------#
    if len(inbound) > 0:
        obj["children"].append({'name': 'Inbound Only', 'children': [], 'impact': 0})
        in_ctxs = {}
        for ip in inbound:
            if 'nwloc' in inbound[ip] and len(inbound[ip]['nwloc']) > 0:
                ctx = inbound[ip]['nwloc'][2]
                if ctx not in in_ctxs:
                    in_ctxs[ctx] = 1
                else:
                    in_ctxs[ctx] += 1
        for ctx in in_ctxs:
            obj["children"][0]['children'].append({
                    'name': ctx,
                    'impact': in_ctxs[ctx]
                })

    #------ Add Outbound ----------------#
    if len(outbound) > 0:
        obj["children"].append({'name':'Outbound Only','children':[],'impact':0})
        out_ctxs = {}
        for ip in outbound:
            if 'nwloc' in outbound[ip] and len(outbound[ip]['nwloc']) > 0:
                ctx = outbound[ip]['nwloc'][2]
                if ctx not in out_ctxs:
                    out_ctxs[ctx] = 1
                else:
                    out_ctxs[ctx] += 1
        for ctx in out_ctxs:
            obj["children"][1]['children'].append({
                    'name': ctx,
                    'impact': out_ctxs[ctx]
                })

    #------ Add TwoWay ----------------#
    if len(twoway) > 0:
        obj["children"].append({'name':'two way','children': [], 'impact': 0})
        tw_ctxs = {}
        for ip in twoway:
            if 'nwloc' in twoway[ip] and len(twoway[ip]['nwloc']) > 0:
                ctx = twoway[ip]['nwloc'][2]
                if ctx not in tw_ctxs:
                    tw_ctxs[ctx] = 1
                else:
                    tw_ctxs[ctx] += 1

        for ctx in tw_ctxs:
            obj["children"][2]['children'].append({
                    'name': ctx,
                    'impact': tw_ctxs[ctx]
                })

    app_path = Configuration.spot()
    hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \
    .format(app_path,date.year,date.month,date.day,anchor.replace(".","_"))

    if HDFSClient.put_file_json(obj,hdfs_path,dendro_fpath,overwrite_file=True):
        return "Incident progression successfully created \n"
    else:
        return "Incident progression couldn't be created \n"
示例#23
0
def create_map_view(ip, inbound, outbound, twoway,date,iploc):

    iplist = ''
    globe_fpath = 'globe-' + ip.replace('.','_') + ".json"
    if os.path.isfile(iploc):
        iplist = np.loadtxt(iploc,dtype=np.uint32,delimiter=',',usecols={0},\
        converters={0: lambda s: np.uint32(s.replace('"',''))})
    else:
        print "No iploc.csv file was found, Map View map won't be created"

    response = ""
    if iplist != '':
        
        globe_json = {}
        globe_json['type'] = "FeatureCollection"
        globe_json['sourceips'] = []
        globe_json['destips'] = []
        for srcip in twoway:
            try:
                row =  twoway[srcip]['geo']
                globe_json['destips'].append({
                        'type': 'Feature',
                        'properties': {
                            'location':row[8],
                            'ip':srcip,
                            'type':1
                        },
                        'geometry': {
                            'type': 'Point',
                            'coordinates': [float(row[7]), float(row[6])]
                        }
                    })
            except ValueError:
                pass
        for dstip in outbound:
            try:
                row =  outbound[dstip]['geo']
                dst_geo = outbound[dstip]['geo_dst']
                globe_json['sourceips'].append({
                        'type': 'Feature',
                        'properties': {
                            'location':row[8],
                            'ip':dstip,
                            'type':3
                        },
                        'geometry': {
                            'type': 'Point',
                            'coordinates': [float(row[7]), float(row[6])]
                        }
                    })
                globe_json['destips'].append({
                        'type': 'Feature',
                        'properties': {
                            'location':row[8],
                            'ip':outbound[dstip]['dst_ip'],
                            'type':3
                        },
                        'geometry': {
                            'type': 'Point',
                            'coordinates': [float(dst_geo[7]), float(dst_geo[6])]
                        }
                    })
            except ValueError:
                pass
        for dstip in inbound:
            try:
                row =  inbound[dstip]['geo']
                dst_geo = inbound[dstip]['geo_src']
                globe_json['sourceips'].append({
                        'type': 'Feature',
                        'properties': {
                            'location':row[8],
                            'ip':dstip,
                            'type':2
                        },
                        'geometry': {
                            'type': 'Point',
                            'coordinates': [float(row[7]), float(row[6])]
                        }
                    })
                globe_json['destips'].append({
                        'type': 'Feature',
                        'properties': {
                            'location':row[8],
                            'ip':inbound[dstip]['src_ip'],
                            'type':2
                        },
                        'geometry': {
                            'type': 'Point',
                            'coordinates': [float(dst_geo[7]), float(dst_geo[6])]
                        }
                    })
            except ValueError:
                pass
        json_str = json.dumps(globe_json)
        app_path = Configuration.spot()
        hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \
        .format(app_path,date.year,date.month,date.day,ip.replace(".","_"))

        if HDFSClient.put_file_json(globe_json,hdfs_path,globe_fpath,overwrite_file=True) :
            response = "Geolocation map successfully created \n"
        else:
            response = "The map can't be created without an iploc file \n"

    return response
示例#24
0
def create_impact_analysis(anchor, inbound, outbound, twoway, threat_name,date):

    stats_fpath = 'stats-' + anchor.replace('.','_') + ".json"

    obj = {
        'name':threat_name,
        'children': [],
        'size': len(inbound) + len(outbound) + len(twoway)
    }

    #----- Add Inbound Connections-------#
    obj["children"].append({'name': 'Inbound Only', 'children': [], 'size': len(inbound)})
    in_ctxs = {}
    for ip in inbound:
        full_ctx = ''
        if 'nwloc' in inbound[ip] and len(inbound[ip]['nwloc']) > 0:
            full_ctx = inbound[ip]['nwloc'][2].split('.')[0]
        ctx = get_ctx_name(full_ctx)
        if ctx not in in_ctxs:
            in_ctxs[ctx] = 1
        else:
            in_ctxs[ctx] += 1
    for ctx in in_ctxs:
        obj["children"][0]['children'].append({
                'name': ctx,
                'size': in_ctxs[ctx]
            })


    #------ Add Outbound ----------------#
    obj["children"].append({'name':'Outbound Only','children':[],'size':len(outbound)})
    out_ctxs = {}
    for ip in outbound:
        full_ctx = ''
        if 'nwloc' in outbound[ip] and len(outbound[ip]['nwloc']) > 0:
            full_ctx = outbound[ip]['nwloc'][2].split('.')[0]
        ctx = get_ctx_name(full_ctx)
        if ctx not in out_ctxs:
            out_ctxs[ctx] = 1
        else:
            out_ctxs[ctx] += 1
    for ctx in out_ctxs:
        obj["children"][1]['children'].append({
                'name': ctx,
                'size': out_ctxs[ctx]
            })

    #------ Add Twoway ----------------#
    obj["children"].append({'name': 'two way', 'children': [], 'size': len(twoway)})
    tw_ctxs = {}
    for ip in twoway:
        full_ctx = ''
        if 'nwloc' in twoway[ip] and len(twoway[ip]['nwloc']) > 0:
            full_ctx = twoway[ip]['nwloc'][2].split('.')[0]
        ctx = get_ctx_name(full_ctx)
        if ctx not in tw_ctxs:
            tw_ctxs[ctx] = 1
        else:
            tw_ctxs[ctx] += 1

    for ctx in tw_ctxs:
        obj["children"][2]['children'].append({
                'name': ctx,
                'size': tw_ctxs[ctx]
            })

    app_path = Configuration.spot()
    hdfs_path = "{0}/flow/oa/storyboard/{1}/{2}/{3}/{4}" \
    .format(app_path,date.year,date.month,date.day,anchor.replace(".","_"))

    data = json.dumps(obj)
    if HDFSClient.put_file_json(obj,hdfs_path,stats_fpath,overwrite_file=True):
        return "Stats file successfully created \n"
    else:
        return "Stats file couldn't be created \n"
示例#25
0
def score_request(date, score, uri):

    if not score and not uri:
        return None

    db = Configuration.db()
    p_query = ("""
		SELECT
		    tdate,time,clientip,host,reqmethod,useragent,resconttype
		    ,duration,username,webcat,referer,respcode,uriport
		    ,uripath,uriquery,serverip,scbytes,csbytes,fulluri
		    ,word,ml_score,uri_rep,respcode_name,network_context
		FROM
		    {0}.proxy_scores
		WHERE
		    y={1} and m={2} and d={3}
		    AND fulluri = '{4}'
		""").format(db, date.year, date.month, date.day, uri)

    connections = ImpalaEngine.execute_query(p_query)

    # add score to connections
    insert_command = ("""
		INSERT INTO {0}.proxy_threat_investigation PARTITION (y={1},m={2},d={3})
		VALUES (""") \
        .format(db,date.year,date.month,date.day)

    fb_data = []
    first = True
    num_rows = 0
    for row in connections:
        cip_index = row[2]
        uri_index = row[18]
        tme_index = row[2]
        hash_field = [str( md5.new(str(cip_index) + str(uri_index)).hexdigest() \
        + str((tme_index.split(":"))[0]) )]

        threat_data = (row[0], row[18], score)
        fb_data.append([row[0],row[1],row[2],row[3],row[4],row[5],row[6],row[7] \
   ,row[8],row[9],row[10],row[11],row[12],row[13],row[14],row[15] \
   ,row[16],row[17],row[18],row[19],score,row[20],row[21],row[22], \
   row[23],hash_field])
        insert_command += "{0}{1}".format("," if not first else "",
                                          threat_data)
        first = False
        num_rows += 1

    insert_command += ")"
    if num_rows > 0: ImpalaEngine.execute_query(insert_command)

    # create feedback file.
    app_path = Configuration.spot()
    feedback_path = "{0}/proxy/scored_results/{1}{2}{3}/feedback"\
    .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2))

    ap_file = True
    if len(HDFSClient.list_dir(feedback_path)) == 0:
        fb_data.insert(0,["p_date","p_time","clientip","host","reqmethod",\
           "useragent","resconttype","duration","username","webcat","referer",\
           "respcode","uriport","uripath","uriquery","serverip","scbytes","csbytes",\
           "fulluri","word","score","uri_rep","uri_sev","respcode_name",\
           "network_context","hash"])
        ap_file = False

    HDFSClient.put_file_csv(fb_data,
                            feedback_path,
                            "ml_feedback.csv",
                            append_file=ap_file)
    return True
示例#26
0
def  score_connection(date,ip="", dns="", ip_sev=0, dns_sev=0):

    if (not ip and not ip_sev) and (not dns and not dns_sev):
        return False

    db = Configuration.db()
    sq_query = ("""
		SELECT
    	    frame_time,unix_tstamp,frame_len,ip_dst,dns_qry_name,dns_qry_class,
		    dns_qry_type,dns_qry_rcode,ml_score,tld,query_rep,
		    hh,dns_qry_class_name,dns_qry_type_name,dns_qry_rcode_name,
		    network_context
		FROM
		    {0}.dns_scores
		WHERE
		    y={1} and m={2} and d={3}
            AND (
		""").format(db,date.year,date.month,date.day)

    connections_filter = ""
    connections_filter += "ip_dst = '{0}' ".format(ip) if ip else ""
    connections_filter += " OR " if ip and dns else ""
    connections_filter += "dns_qry_name = '{0}' ".format(dns) if dns else ""
    connections_filter += ")"
    connections = ImpalaEngine.execute_query(sq_query + connections_filter)

    # add score to connections

    insert_command = ("""INSERT INTO {0}.dns_threat_investigation
                        PARTITION (y={1},m={2},d={3})
                        VALUES (""") \
                        .format(db,date.year,date.month,date.day)

    fb_data =  []
    first = True
    num_rows = 0
    for row in connections:
        # insert into dns_threat_investigation.
        threat_data = (row[1],row[3],row[4],ip_sev if ip == row[3] else 0,\
        dns_sev if dns == row[4] else 0)

        fb_data.append([row[0],row[2],row[3],row[4],row[5],row[6],row[7],\
        row[8],row[9],row[10],row[11],ip_sev,dns_sev,row[12],row[13],row[14],\
        row[15],row[1]])

        insert_command += "{0}{1}".format("," if not first else "", threat_data)
        first = False
        num_rows += 1

    insert_command += ")"
    if num_rows > 0: ImpalaEngine.execute_query(insert_command)

    # create feedback file.
    app_path = Configuration.spot()
    feedback_path = "{0}/dns/scored_results/{1}{2}{3}/feedback"\
    .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2))
    ap_file = True

    if len(HDFSClient.list_dir(feedback_path)) == 0:
        fb_data.insert(0,["frame_time","frame_len","ip_dst","dns_qry_name",\
        "dns_qry_class","dns_qry_type","dns_qry_rcode","score","tld","query_rep",\
        "hh","ip_sev","dns_sev","dns_qry_class_name","dns_qry_type_name",\
        "dns_qry_rcode_name","network_context","unix_tstamp"])
        ap_file = False

    HDFSClient.put_file_csv(fb_data,feedback_path,"ml_feedback.csv",append_file=ap_file)
    return True