Exemplo n.º 1
0
def expanded_search(date, query=None, ip=None, limit=20):

    if not ip and not query:
        return False

    db = Configuration.db()
    if ip:
        count = "dns_qry_name"
        filter_param = "ip_dst"
        filter_value = ip
    else:
        count = "ip_dst"
        filter_param = "dns_qry_name"
        filter_value = query

    expanded_query = ("""
   		SELECT
    		    COUNT({0}) as total,dns_qry_name,ip_dst
		FROM
		    {1}.dns
		WHERE y={2} AND m={3} AND d={4}
		AND {5} = '{6}'
		GROUP BY {0},{5}
		ORDER BY total DESC
		LIMIT {7}
    """).format(count,db,date.year,date.month,date.day,\
    filter_param,filter_value,limit if limit else 20)

    return ImpalaEngine.execute_query_as_list(expanded_query)
Exemplo n.º 2
0
def suspicious_requests(date, uri=None, ip=None, limit=250):

    db = Configuration.db()
    proxy_query = ("""
	SELECT STRAIGHT_JOIN
	    ps.tdate,ps.time,ps.clientip,ps.host,ps.reqmethod,ps.useragent,
        ps.resconttype,ps.duration,ps.username,ps.webcat,ps.referer,
        ps.respcode,ps.uriport,ps.uripath,ps.uriquery,ps.serverip,ps.scbytes,
        ps.csbytes,ps.fulluri,ps.ml_score,ps.uri_rep,ps.respcode_name,
        ps.network_context
	FROM
	    {0}.proxy_scores ps
	LEFT JOIN
	    {0}.proxy_threat_investigation pt
	    ON (ps.fulluri = pt.fulluri)
	WHERE
	    ps.y={1} AND ps.m={2} AND ps.d={3}
	    AND (pt.fulluri is NULL)
    """).format(db, date.year, date.month, date.day)

    p_filter = ""
    p_filter += " AND ps.fulluri LIKE '%{0}%'".format(uri) if uri else ""
    p_filter += " AND ps.clientip = '{0}'".format(ip) if ip else ""
    p_filter += " ORDER BY ps.ml_score limit {0}".format(limit)
    proxy_query = proxy_query + p_filter
    return ImpalaEngine.execute_query_as_list(proxy_query)
Exemplo n.º 3
0
def expanded_search(date,query=None,ip=None,limit=20):

    if not ip and not query:
        return False

    db = Configuration.db()
    if ip:
	count = "dns_qry_name"
        filter_param = "ip_dst"
	filter_value = ip
    else:
	count = "ip_dst"
	filter_param = "dns_qry_name"
	filter_value = query

    expanded_query = ("""
   		SELECT
    		    COUNT({0}) as total,dns_qry_name,ip_dst
		FROM
		    {1}.dns
		WHERE y={2} AND m={3} AND d={4}
		AND {5} = '{6}'
		GROUP BY {0},{5}
		ORDER BY total DESC
		LIMIT {7}
    """).format(count,db,date.year,date.month,date.day,\
    filter_param,filter_value,limit if limit else 20)

    return ImpalaEngine.execute_query_as_list(expanded_query)
Exemplo n.º 4
0
def suspicious_requests(date,uri=None,ip=None,limit=250):

    db = Configuration.db()
    proxy_query = ("""
	SELECT STRAIGHT_JOIN
	    ps.tdate,ps.time,ps.clientip,ps.host,ps.reqmethod,ps.useragent,
        ps.resconttype,ps.duration,ps.username,ps.webcat,ps.referer,
        ps.respcode,ps.uriport,ps.uripath,ps.uriquery,ps.serverip,ps.scbytes,
        ps.csbytes,ps.fulluri,ps.ml_score,ps.uri_rep,ps.respcode_name,
        ps.network_context
	FROM
	    {0}.proxy_scores ps
	LEFT JOIN
	    {0}.proxy_threat_investigation pt
	    ON (ps.fulluri = pt.fulluri)
	WHERE
	    ps.y={1} AND ps.m={2} AND ps.d={3}
	    AND (pt.fulluri is NULL)
    """).format(db,date.year,date.month,date.day)


    p_filter = ""
    p_filter += " AND ps.fulluri LIKE '%{0}%'".format(uri) if uri else ""
    p_filter += " AND ps.clientip = '{0}'".format(ip) if ip else ""
    p_filter += " ORDER BY ps.ml_score limit {0}".format(limit)
    proxy_query = proxy_query + p_filter
    return ImpalaEngine.execute_query_as_list(proxy_query)
Exemplo n.º 5
0
def suspicious_queries(date, ip=None, query=None, limit=250):

    db = Configuration.db()
    sq_query = ("""
            SELECT STRAIGHT_JOIN
                ds.unix_tstamp,frame_len,ds.ip_dst,ds.dns_qry_name,
                dns_qry_class,dns_qry_type,dns_qry_rcode,ml_score,tld,
                query_rep,hh,dns_qry_class_name,dns_qry_type_name,
                dns_qry_rcode_name,network_context
            FROM {0}.dns_scores ds
            LEFT JOIN {0}.dns_threat_investigation dt
                ON  (ds.dns_qry_name = dt.dns_qry_name)
            WHERE
                ds.y={1} AND ds.m={2} AND ds.d={3}
                AND (dt.dns_qry_name is NULL)
            """).format(db, date.year, date.month, date.day)

    sq_filter = ""
    sq_filter += " AND ds.ip_dst = '{0}'".format(ip) if ip else ""
    sq_filter += " AND ds.dns_qry_name LIKE '%{0}%'".format(
        query) if query else ""
    sq_filter += " ORDER BY ds.ml_score limit {0}".format(limit)

    sq_query = sq_query + sq_filter
    return ImpalaEngine.execute_query_as_list(sq_query)
Exemplo n.º 6
0
def chord_details(ip,date):

    db = Configuration.db()
    chord_query =  ("""
            SELECT
                srcip,dstip,ibyt,ipkt
            FROM {0}.flow_chords
            WHERE  y={1} AND m={2} AND d={3} AND ip_threat='{4}'
            """).format(db,date.year,date.month,date.day,ip)

    return ImpalaEngine.execute_query_as_list(chord_query)
Exemplo n.º 7
0
def chord_details(ip,date):

    db = Configuration.db()
    chord_query =  ("""
            SELECT
                srcip,dstip,ibyt,ipkt
            FROM {0}.flow_chords
            WHERE  y={1} AND m={2} AND d={3} AND ip_threat='{4}'
            """).format(db,date.year,date.month,date.day,ip)

    return ImpalaEngine.execute_query_as_list(chord_query)
Exemplo n.º 8
0
def get_scored_requests(date):

    db = Configuration.db()
    sc_query = ("""
                SELECT
                    tdate,fulluri,uri_sev
                FROM
                    {0}.proxy_threat_investigation
                WHERE
                    y={1} AND m={2} AND d={3}
                """).format(db, date.year, date.month, date.day)

    return ImpalaEngine.execute_query_as_list(sc_query)
Exemplo n.º 9
0
def time_line(date,uri):

    db = Configuration.db()
    time_line_query = ("""
            SELECT
		p_threat,tstart,tend,duration,clientip,respcode,respcodename
            FROM {0}.proxy_timeline
            WHERE
                y={1} AND m={2} AND d={3}
                AND p_threat = '{4}'
            """).format(db,date.year,date.month,date.day,uri)

    return ImpalaEngine.execute_query_as_list(time_line_query)
Exemplo n.º 10
0
def  get_scored_connections(date):

    db = Configuration.db()
    sc_query =  ("""
                SELECT
                    unix_tstamp,ip_dst,dns_qry_name,ip_sev,dns_sev
                FROM
                    {0}.dns_threat_investigation
                WHERE
                    y={1} AND m={2} AND d={3}
                """).format(db,date.year,date.month,date.day)

    return ImpalaEngine.execute_query_as_list(sc_query)
Exemplo n.º 11
0
def get_scored_requests(date):

    db = Configuration.db()
    sc_query =  ("""
                SELECT
                    tdate,fulluri,uri_sev
                FROM
                    {0}.proxy_threat_investigation
                WHERE
                    y={1} AND m={2} AND d={3}
                """).format(db,date.year,date.month,date.day)

    return ImpalaEngine.execute_query_as_list(sc_query)
Exemplo n.º 12
0
def time_line(date, uri):

    db = Configuration.db()
    time_line_query = ("""
            SELECT
		p_threat,tstart,tend,duration,clientip,respcode,respcodename
            FROM {0}.proxy_timeline
            WHERE
                y={1} AND m={2} AND d={3}
                AND p_threat = '{4}'
            """).format(db, date.year, date.month, date.day, uri)

    return ImpalaEngine.execute_query_as_list(time_line_query)
Exemplo n.º 13
0
def get_scored_connections(date):

    db = Configuration.db()
    sc_query = ("""
                SELECT
                    unix_tstamp,ip_dst,dns_qry_name,ip_sev,dns_sev
                FROM
                    {0}.dns_threat_investigation
                WHERE
                    y={1} AND m={2} AND d={3}
                """).format(db, date.year, date.month, date.day)

    return ImpalaEngine.execute_query_as_list(sc_query)
Exemplo n.º 14
0
def expanded_search(date,uri):

    db = Configuration.db()
    expanded_query = ("""
			SELECT p_date, p_time, clientip, username, duration, fulluri,\
			    webcat, respcode, reqmethod,useragent, resconttype,\
			    referer, uriport, serverip, scbytes, csbytes
			FROM {0}.proxy
			WHERE y='{1}' AND m='{2}' AND d='{3}'
			AND (fulluri='{4}' OR referer ='{4}')
			ORDER BY p_time
			""")\
            .format(db,date.year,str(date.month).zfill(2),str(date.day).zfill(2),uri)
    return ImpalaEngine.execute_query_as_list(expanded_query)
Exemplo n.º 15
0
def client_details(date, ip):

    db = Configuration.db()
    client_query =("""
            SELECT
                ip_dst,dns_a,dns_qry_name,ip_dst
            FROM
                {0}.dns_dendro
            WHERE
                y={1} AND m={2} AND d={3}
                AND ip_dst='{4}'
            """).format(db,date.year,date.month,date.day,ip)

    return ImpalaEngine.execute_query_as_list(client_query)
Exemplo n.º 16
0
def time_line(ip,date):

    db = Configuration.db()
    time_line_query = ("""
        SELECT
            ip_threat,tstart,tend,srcip,dstip,proto,
		    sport,dport,ipkt,ibyt
        FROM {0}.flow_timeline
        WHERE
            y={1} AND m={2} AND d={3}
            AND ip_threat = '{4}'
        """).format(db,date.year,date.month,date.day,ip)

    return ImpalaEngine.execute_query_as_list(time_line_query)
Exemplo n.º 17
0
def get_scored_connections(date):


    db = Configuration.db()
    scored_query = ("""
            SELECT
                tstart,srcip,dstip,srcport,dstport,score
            FROM
                {0}.flow_threat_investigation
            WHERE
                y={1} AND m={2} AND d={3}
            """).format(db,date.year,date.month,date.day)

    return ImpalaEngine.execute_query_as_list(scored_query)
Exemplo n.º 18
0
def get_scored_connections(date):


    db = Configuration.db()
    scored_query = ("""
            SELECT
                tstart,srcip,dstip,srcport,dstport,score
            FROM
                {0}.flow_threat_investigation
            WHERE
                y={1} AND m={2} AND d={3}
            """).format(db,date.year,date.month,date.day)

    return ImpalaEngine.execute_query_as_list(scored_query)
Exemplo n.º 19
0
def expanded_search(date, uri):

    db = Configuration.db()
    expanded_query = ("""
			SELECT p_date, p_time, clientip, username, duration, fulluri,\
			    webcat, respcode, reqmethod,useragent, resconttype,\
			    referer, uriport, serverip, scbytes, csbytes
			FROM {0}.proxy
			WHERE y='{1}' AND m='{2}' AND d='{3}'
			AND (fulluri='{4}' OR referer ='{4}')
			ORDER BY p_time
			""")\
            .format(db,date.year,str(date.month).zfill(2),str(date.day).zfill(2),uri)
    return ImpalaEngine.execute_query_as_list(expanded_query)
Exemplo n.º 20
0
def client_details(date, ip):

    db = Configuration.db()
    client_query = ("""
            SELECT
                ip_dst,dns_a,dns_qry_name,ip_dst
            FROM
                {0}.dns_dendro
            WHERE
                y={1} AND m={2} AND d={3}
                AND ip_dst='{4}'
            """).format(db, date.year, date.month, date.day, ip)

    return ImpalaEngine.execute_query_as_list(client_query)
Exemplo n.º 21
0
def time_line(ip,date):

    db = Configuration.db()
    time_line_query = ("""
        SELECT
            ip_threat,tstart,tend,srcip,dstip,proto,
		    sport,dport,ipkt,ibyt
        FROM {0}.flow_timeline
        WHERE
            y={1} AND m={2} AND d={3}
            AND ip_threat = '{4}'
        """).format(db,date.year,date.month,date.day,ip)

    return ImpalaEngine.execute_query_as_list(time_line_query)
Exemplo n.º 22
0
def ingest_summary(start_date, end_date):

    db = Configuration.db()
    is_query = ("""
                SELECT
                    tdate,total
                FROM {0}.proxy_ingest_summary
                WHERE
                    ( y >= {1} and y <= {2}) AND
                    ( m >= {3} and m <= {4}) AND
                    ( d >= {5} and d <= {6})
                """)\
                .format(db,start_date.year,end_date.year,start_date.month,end_date.month, start_date.day, end_date.day)

    return ImpalaEngine.execute_query_as_list(is_query)
Exemplo n.º 23
0
def ingest_summary(start_date,end_date):

    db = Configuration.db()
    is_query = ("""
                SELECT
                    tdate,total
                FROM {0}.proxy_ingest_summary
                WHERE
                    ( y >= {1} and y <= {2}) AND
                    ( m >= {3} and m <= {4}) AND
                    ( d >= {5} and d <= {6})
                """)\
                .format(db,start_date.year,end_date.year,start_date.month,end_date.month, start_date.day, end_date.day)

    return ImpalaEngine.execute_query_as_list(is_query)
Exemplo n.º 24
0
def ingest_summary(start_date,end_date):

    db = Configuration.db()

    daterange_select = daterange_query(start_date, end_date)

    is_query = ("""
            SELECT
                tdate,total
            FROM {0}.flow_ingest_summary
            WHERE
                {1}
            ORDER BY tdate
            """).format(db, daterange_select)

    return ImpalaEngine.execute_query_as_list(is_query)
Exemplo n.º 25
0
def details(frame_time, query):

    db = Configuration.db()
    details_query = ("""
            SELECT
		unix_tstamp,frame_len,ip_dst,ip_src,dns_qry_name,dns_qry_class,
		dns_qry_type,dns_qry_rcode,dns_a,dns_qry_type_name,
		dns_qry_rcode_name,dns_qry_class_name
            FROM
                {0}.dns_edge
            WHERE
                y={1} AND m={2} AND d={3} AND hh={4} AND dns_qry_name = '{5}'
            """).format(db,frame_time.year,frame_time.month,frame_time.day,\
            frame_time.hour,query)

    return ImpalaEngine.execute_query_as_list(details_query)
Exemplo n.º 26
0
def story_board(date):

    db = Configuration.db()
    sb_query= ("""
            SELECT
                ip_threat,title,text
            FROM
                {0}.flow_storyboard
            WHERE
                y={1} AND m={2} AND d={3}
            """).format(db,date.year,date.month,date.day)

    results = ImpalaEngine.execute_query_as_list(sb_query)
    for row in results:
	       row["text"] = row["text"].replace("\n","\\n")
    return results
Exemplo n.º 27
0
def story_board(date):

    db = Configuration.db()
    sb_query = ("""
            SELECT
                p_threat,title,text
            FROM
                {0}.proxy_storyboard
            WHERE
                y={1} AND m={2} AND d={3}
            """).format(db, date.year, date.month, date.day)

    results = ImpalaEngine.execute_query_as_list(sb_query)
    for row in results:
        row["text"] = row["text"].replace("\n", "\\n")
    return results
Exemplo n.º 28
0
def details(frame_time, query):

    db = Configuration.db()
    details_query = ("""
            SELECT
		unix_tstamp,frame_len,ip_dst,ip_src,dns_qry_name,dns_qry_class,
		dns_qry_type,dns_qry_rcode,dns_a,dns_qry_type_name,
		dns_qry_rcode_name,dns_qry_class_name
            FROM
                {0}.dns_edge
            WHERE
                y={1} AND m={2} AND d={3} AND hh={4} AND dns_qry_name = '{5}'
            """).format(db,frame_time.year,frame_time.month,frame_time.day,\
            frame_time.hour,query)

    return ImpalaEngine.execute_query_as_list(details_query)
Exemplo n.º 29
0
def save_comments(anchor, ip, query, title, text, date):

    db = Configuration.db()
    sb_query = ("""
            SELECT
                ip_threat,dns_threat,title,text
            FROM
                {0}.dns_storyboard
            WHERE
                y = {1} AND m= {2} AND d={3}
            """).format(db, date.year, date.month, date.day)
    sb_data = ImpalaEngine.execute_query_as_list(sb_query)

    # find value if already exists.
    saved = False
    for item in sb_data:
        if item["ip_threat"] == anchor or item["dns_threat"] == anchor:
            item["title"] = title
            item["text"] = text
            saved = True

    if not saved:
        sb_data.append({
            'text': text,
            'ip_threat': str(ip),
            'title': title,
            'dns_threat': query
        })

    #remove old file.
    app_path = Configuration.spot()
    old_file = "{0}/dns/hive/oa/storyboard/y={1}/m={2}/d={3}/"\
    .format(app_path,date.year,date.month,date.day)

    HDFSClient.delete_folder(old_file, "impala")
    ImpalaEngine.execute_query("invalidate metadata")

    for item in sb_data:
        insert_query = ("""
         	INSERT INTO {0}.dns_storyboard PARTITION(y={1} , m={2} ,d={3})
            	VALUES ( '{4}', '{5}', '{6}','{7}')
            	""")\
                       .format(db,date.year,date.month,date.day,\
                       item["ip_threat"],item["dns_threat"],item["title"],item["text"])
        ImpalaEngine.execute_query(insert_query)

    return True
Exemplo n.º 30
0
def details(src_ip,dst_ip,date):

    db = Configuration.db()
    details_query = ("""
            SELECT
                tstart,srcip,dstip,sport,dport,proto,flags,
                tos,ibyt,ipkt,input,output,rip,obyt,opkt
            FROM {0}.flow_edge
            WHERE
                y={1} AND m={2} AND d={3} AND hh={4} AND mn={5}
                AND ((srcip='{6}' AND dstip='{7}')
                OR  (srcip='{7}' AND dstip='{6}'))
            ORDER BY tstart
            """).format(db,date.year,date.month,date.day,date.hour, \
                        date.minute,src_ip,dst_ip)

    return ImpalaEngine.execute_query_as_list(details_query)
Exemplo n.º 31
0
def ingest_summary(start_date,end_date):

    db = Configuration.db()
    is_query = ("""
            SELECT
                tdate,total
            FROM {0}.flow_ingest_summary
            WHERE
                ( y >= {1} AND y <= {2}) AND
                ( m >= {3} AND m <= {4}) AND
                ( d >= {5} AND d <= {6})
            ORDER BY tdate
            """).format(db,start_date.year,end_date.year, \
                        start_date.month,end_date.month, \
                        start_date.day, end_date.day)

    return ImpalaEngine.execute_query_as_list(is_query)
Exemplo n.º 32
0
def details(src_ip,dst_ip,date):

    db = Configuration.db()
    details_query = ("""
            SELECT
                tstart,srcip,dstip,sport,dport,proto,flags,
                tos,ibyt,ipkt,input,output,rip,obyt,opkt
            FROM {0}.flow_edge
            WHERE
                y={1} AND m={2} AND d={3} AND hh={4} AND mn={5}
                AND ((srcip='{6}' AND dstip='{7}')
                OR  (srcip='{7}' AND dstip='{6}'))
            ORDER BY tstart
            """).format(db,date.year,date.month,date.day,date.hour, \
                        date.minute,src_ip,dst_ip)

    return ImpalaEngine.execute_query_as_list(details_query)
Exemplo n.º 33
0
def save_comment(ip,title,text,date):

    #Get current table info.
    db = Configuration.db()
    sb_query = ("""
            SELECT
                ip_threat,title,text
            FROM
                {0}.flow_storyboard
            WHERE
                y = {1} AND m= {2} AND d={3}
            """).format(db,date.year,date.month,date.day)

    sb_data = ImpalaEngine.execute_query_as_list(sb_query)

    # find value if already exists.
    saved = False
    for item in sb_data:
        if item["ip_threat"] == ip:
            item["title"] = title
            item["text"] = text
            saved = True

    if not saved:
        sb_data.append({'text': text, 'ip_threat': str(ip), 'title': title})

    #remove old file.
    app_path = Configuration.spot()
    old_file = "{0}/flow/hive/oa/storyboard/y={1}/m={2}/d={3}/" \
    .format(app_path,date.year,date.month,date.day)

    # remove file manually to allow the comments update.
    HDFSClient.delete_folder(old_file,"impala")
    ImpalaEngine.execute_query("invalidate metadata")

    for item in sb_data:
	insert_query = ("""
         	INSERT INTO {0}.flow_storyboard PARTITION(y={1} , m={2} ,d={3})
            	VALUES ( '{4}', '{5}','{6}')
            	""") \
                .format(db,date.year,date.month,date.day, \
                item["ip_threat"],item["title"],item["text"])

        ImpalaEngine.execute_query(insert_query)
    return True
Exemplo n.º 34
0
def details(date,uri,ip):

    if not uri and not ip:
        return None

    db = Configuration.db()
    p_details = ("""
		SELECT
		    tdate,time,clientIp,host,webcat,respcode,respcode_name
		    ,reqmethod,useragent,resconttype,referer,uriport,serverip
		    ,scbytes,csbytes,fulluri,hh
		FROM
		    {0}.proxy_edge
		WHERE
		    y={1} AND m={2} AND d={3} AND 
            (fulluri='{4}' AND clientIp='{5}')
		""").format(db,date.year,date.month,date.day,uri.replace("'","//'"),ip)
    return ImpalaEngine.execute_query_as_list(p_details)
Exemplo n.º 35
0
def details(date, uri, ip):

    if not uri and not ip:
        return None

    db = Configuration.db()
    p_details = ("""
		SELECT
		    tdate,time,clientIp,host,webcat,respcode,respcode_name
		    ,reqmethod,useragent,resconttype,referer,uriport,serverip
		    ,scbytes,csbytes,fulluri,hh
		FROM
		    {0}.proxy_edge
		WHERE
		    y={1} AND m={2} AND d={3} AND 
            (fulluri='{4}' AND clientIp='{5}')
		""").format(db, date.year, date.month, date.day, uri.replace("'", "//'"), ip)
    return ImpalaEngine.execute_query_as_list(p_details)
Exemplo n.º 36
0
def expanded_search(date,ip):

    db = Configuration.db()
    expanded_query = ("""
		SELECT
		    min(treceived) as firstseen, max(treceived) as lastseen,
            sip as srcip, dip as dstip, sport as sport,
            dport as dport, count(sip) as conns, max(ipkt) as maxpkts,
		    avg(ipkt) as avgpkts, max(ibyt) as maxbyts, avg(ibyt) as avgbyts
	    FROM
		    {0}.flow
        WHERE
	        y={1} AND m={2} AND d={3}
        AND (sip ='{4}'  OR dip='{4}')
        GROUP BY
		    sip, dip,sport,dport
		""").format(db,date.year,date.month,date.day,ip)

    return ImpalaEngine.execute_query_as_list(expanded_query)
Exemplo n.º 37
0
    def _ingest_summary(self):
        # get date parameters.
        yr = self._date[:4]
        mn = self._date[4:6]
        dy = self._date[6:]

        self._logger.info("Getting ingest summary data for the day")

        ingest_summary_cols = ["date", "total"]
        result_rows = []
        df_filtered = pd.DataFrame()

        query_to_load = ("""
            SELECT frame_time, COUNT(*) as total FROM {0}.{1}
            WHERE y={2} AND m={3} AND d={4} AND unix_tstamp IS NOT NULL
            AND frame_time IS NOT NULL AND frame_len IS NOT NULL
            AND dns_qry_name IS NOT NULL AND ip_src IS NOT NULL
            AND (dns_qry_class IS NOT NULL AND dns_qry_type IS NOT NULL
            AND dns_qry_rcode IS NOT NULL ) GROUP BY frame_time;
        """).format(self._db, self._table_name, yr, mn, dy)

        results = impala.execute_query_as_list(query_to_load)
        df = pd.DataFrame(results)

        # Forms a new dataframe splitting the minutes from the time column
        df_new = pd.DataFrame([["{0}-{1}-{2} {3}:{4}".format(yr, mn, dy,\
            val['frame_time'].replace("  "," ").split(" ")[3].split(":")[0].zfill(2),\
            val['frame_time'].replace("  "," ").split(" ")[3].split(":")[1].zfill(2)),\
            int(val['total']) if not math.isnan(val['total']) else 0 ] for key,val in df.iterrows()],columns = ingest_summary_cols)

        #Groups the data by minute
        sf = df_new.groupby(by=['date'])['total'].sum()
        df_per_min = pd.DataFrame({'date': sf.index, 'total': sf.values})

        df_final = df_filtered.append(df_per_min,
                                      ignore_index=True).to_records(
                                          False, False)

        if len(df_final) > 0:
            query_to_insert = ("""
                INSERT INTO {0}.dns_ingest_summary PARTITION (y={1}, m={2}, d={3}) VALUES {4};
            """).format(self._db, yr, mn, dy, tuple(df_final))
            impala.execute_query(query_to_insert)
Exemplo n.º 38
0
def incident_progression(date, query,ip):

    if not ip and not query:
        return None

    db = Configuration.db()
    return_value = "dns_qry_name" if ip else "ip_dst"
    dns_threat_query = ("""
            SELECT
                anchor,total,{0}
            FROM
                {1}.dns_threat_dendro
            WHERE
                y={2} AND m={3} AND d={4}
                AND anchor = '{5}'
            """).format(return_value,db,date.year,date.month,date.day,\
            query if query else ip)
                
    return ImpalaEngine.execute_query_as_list(dns_threat_query)
Exemplo n.º 39
0
def incident_progression(date, query, ip):

    if not ip and not query:
        return None

    db = Configuration.db()
    return_value = "dns_qry_name" if ip else "ip_dst"
    dns_threat_query = ("""
            SELECT
                anchor,total,{0}
            FROM
                {1}.dns_threat_dendro
            WHERE
                y={2} AND m={3} AND d={4}
                AND anchor = '{5}'
            """).format(return_value,db,date.year,date.month,date.day,\
            query if query else ip)

    return ImpalaEngine.execute_query_as_list(dns_threat_query)
Exemplo n.º 40
0
def expanded_search(date,ip):

    db = Configuration.db()
    expanded_query = ("""
		SELECT
		    min(treceived) as firstseen, max(treceived) as lastseen,
            sip as srcip, dip as dstip, sport as sport,
            dport as dport, count(sip) as conns, max(ipkt) as maxpkts,
		    avg(ipkt) as avgpkts, max(ibyt) as maxbyts, avg(ibyt) as avgbyts
	    FROM
		    {0}.flow
        WHERE
	        y={1} AND m={2} AND d={3}
        AND (sip ='{4}'  OR dip='{4}')
        GROUP BY
		    sip, dip,sport,dport
		""").format(db,date.year,date.month,date.day,ip)

    return ImpalaEngine.execute_query_as_list(expanded_query)
Exemplo n.º 41
0
    def _ingest_summary(self):
        # get date parameters.
        yr = self._date[:4]
        mn = self._date[4:6]
        dy = self._date[6:]

        self._logger.info("Getting ingest summary data for the day")
        
        ingest_summary_cols = ["date","total"]		
        result_rows = []        
        df_filtered =  pd.DataFrame()

        query_to_load = ("""
            SELECT frame_time, COUNT(*) as total FROM {0}.{1}
            WHERE y={2} AND m={3} AND d={4} AND unix_tstamp IS NOT NULL
            AND frame_time IS NOT NULL AND frame_len IS NOT NULL
            AND dns_qry_name IS NOT NULL AND ip_src IS NOT NULL
            AND (dns_qry_class IS NOT NULL AND dns_qry_type IS NOT NULL
            AND dns_qry_rcode IS NOT NULL ) GROUP BY frame_time;
        """).format(self._db,self._table_name, yr, mn, dy)

        results = impala.execute_query_as_list(query_to_load)
        df = pd.DataFrame(results)

        # Forms a new dataframe splitting the minutes from the time column
        df_new = pd.DataFrame([["{0}-{1}-{2} {3}:{4}".format(yr, mn, dy,\
            val['frame_time'].replace("  "," ").split(" ")[3].split(":")[0].zfill(2),\
            val['frame_time'].replace("  "," ").split(" ")[3].split(":")[1].zfill(2)),\
            int(val['total']) if not math.isnan(val['total']) else 0 ] for key,val in df.iterrows()],columns = ingest_summary_cols)

        #Groups the data by minute
        sf = df_new.groupby(by=['date'])['total'].sum()
        df_per_min = pd.DataFrame({'date':sf.index, 'total':sf.values})

        df_final = df_filtered.append(df_per_min, ignore_index=True).to_records(False,False)

        if len(df_final) > 0:
            query_to_insert=("""
                INSERT INTO {0}.dns_ingest_summary PARTITION (y={1}, m={2}, d={3}) VALUES {4};
            """).format(self._db, yr, mn, dy, tuple(df_final))
            impala.execute_query(query_to_insert)
Exemplo n.º 42
0
def suspicious_connections(date,ip=None,limit=250):

    db = Configuration.db()
    sc_query = ("""
                SELECT STRAIGHT_JOIN
                    fs.tstart,fs.srcip,fs.dstip,fs.sport,fs.dport,proto,
                    ipkt,ibyt,opkt,obyt,ml_score,rank,srcip_internal,
                    dstip_internal,src_geoloc,dst_geoloc,src_domain,
                    dst_domain,src_rep,dst_rep
                FROM {0}.flow_scores fs
                LEFT JOIN {0}.flow_threat_investigation ft
                    ON (( fs.srcip = ft.srcip) OR ( fs.dstip = ft.dstip))
                WHERE fs.y={1} AND fs.m={2} and fs.d={3}
                    AND ( ft.srcip is NULL AND ft.dstip is NULL )
                """).format(db,date.year,date.month,date.day)

    sc_filter = ""
    if ip:
        sc_filter = " AND ( fs.srcip='{0}' OR fs.dstip='{0}')".format(ip)

    sc_filter += " ORDER BY rank  limit {0}".format(limit)
    sc_query = sc_query + sc_filter
    return ImpalaEngine.execute_query_as_list(sc_query)
Exemplo n.º 43
0
def suspicious_connections(date,ip=None,limit=250):

    db = Configuration.db()
    sc_query = ("""
                SELECT STRAIGHT_JOIN
                    fs.tstart,fs.srcip,fs.dstip,fs.sport,fs.dport,proto,
                    ipkt,ibyt,opkt,obyt,ml_score,rank,srcip_internal,
                    dstip_internal,src_geoloc,dst_geoloc,src_domain,
                    dst_domain,src_rep,dst_rep
                FROM {0}.flow_scores fs
                LEFT JOIN {0}.flow_threat_investigation ft
                    ON (( fs.srcip = ft.srcip) OR ( fs.dstip = ft.dstip))
                WHERE fs.y={1} AND fs.m={2} and fs.d={3}
                    AND ( ft.srcip is NULL AND ft.dstip is NULL )
                """).format(db,date.year,date.month,date.day)

    sc_filter = ""
    if ip:
        sc_filter = " AND ( fs.srcip='{0}' OR fs.dstip='{0}')".format(ip)

    sc_filter += " ORDER BY rank  limit {0}".format(limit)
    sc_query = sc_query + sc_filter
    return ImpalaEngine.execute_query_as_list(sc_query)
Exemplo n.º 44
0
def suspicious_queries(date, ip=None, query=None,limit=250):

    db = Configuration.db()
    sq_query = ("""
            SELECT STRAIGHT_JOIN
                ds.unix_tstamp,frame_len,ds.ip_dst,ds.dns_qry_name,
                dns_qry_class,dns_qry_type,dns_qry_rcode,ml_score,tld,
                query_rep,hh,dns_qry_class_name,dns_qry_type_name,
                dns_qry_rcode_name,network_context
            FROM {0}.dns_scores ds
            LEFT JOIN {0}.dns_threat_investigation dt
                ON  (ds.dns_qry_name = dt.dns_qry_name)
            WHERE
                ds.y={1} AND ds.m={2} AND ds.d={3}
                AND (dt.dns_qry_name is NULL)
            """).format(db,date.year,date.month,date.day)

    sq_filter = ""
    sq_filter += " AND ds.ip_dst = '{0}'".format(ip) if ip else ""
    sq_filter += " AND ds.dns_qry_name LIKE '%{0}%'".format(query) if query else ""
    sq_filter += " ORDER BY ds.ml_score limit {0}".format(limit)

    sq_query = sq_query + sq_filter
    return ImpalaEngine.execute_query_as_list(sq_query)
Exemplo n.º 45
0
def create_timeline(anchor,clientips,date,top_results):
    response = ""
    susp_ips = []

    if clientips:
        srtlist = sorted(list(clientips.items()), key=lambda x: x[1], reverse=True)
        for val in srtlist[:top_results]:
            susp_ips.append(val[0])

    if anchor != "":
        db = Configuration.db()
        time_line_query = ("""
                SELECT p_threat,tstart,tend,duration,clientip,respcode,respcodename
                FROM {0}.proxy_timeline
                WHERE
                    y={1} AND m={2} AND d={3} AND p_threat != '{4}'
                """).format(db,date.year,date.month,date.day,anchor.replace("'","//'"))
        
        tmp_timeline_data = ImpalaEngine.execute_query_as_list(time_line_query)

        imp_query = ("""
                        INSERT INTO TABLE {0}.proxy_timeline
                        PARTITION (y={2}, m={3},d={4})
                        SELECT
                            '{7}' as p_threat, concat(cast(p_date as string),
                            ' ', cast(MIN(p_time) as string)) AS tstart,
                            concat(cast(p_date as string), ' ',
                            cast(MAX(p_time) as string)) AS tend,
                            SUM(duration) AS duration,
                            clientip, respcode,"respCodeName" as respCodeName
                        FROM {0}.proxy
                        WHERE fulluri='{1}' AND clientip IN ({5})
                        AND y='{2}' AND m='{3}' AND d='{4}'
                        GROUP BY clientip, p_time, respcode, p_date
                        LIMIT {6}
                    """)\
                    .format(db,anchor,date.year,str(date.month).zfill(2),\
                    str(date.day).zfill(2),("'" + "','".join(susp_ips) + "'")\
                    ,top_results,anchor)

        app_path = Configuration.spot()
        old_file = "{0}/proxy/hive/oa/timeline/y={1}/m={2}/d={3}"\
        .format(app_path,date.year,date.month,date.day)

        HDFSClient.delete_folder(old_file,"impala")
        ImpalaEngine.execute_query("invalidate metadata")

        #Insert temporary values
        for item in tmp_timeline_data:
            insert_query = ("""
                        INSERT INTO {0}.proxy_timeline PARTITION(y={1} , m={2} ,d={3})
                        VALUES ('{4}', '{5}', '{6}',{7},'{8}','{9}','{10}')
                        """)\
                        .format(db,date.year,date.month,date.day,\
                        item["p_threat"],item["tstart"],item["tend"],item["duration"],item["clientip"],item["respcode"],item["respcodename"])

            ImpalaEngine.execute_query(insert_query)

        ImpalaEngine.execute_query(imp_query)
        response = "Timeline successfully saved"
    else:
        response = "Timeline couldn't be created"
Exemplo n.º 46
0
def create_timeline(anchor, clientips, date, top_results):
    response = ""
    susp_ips = []

    if clientips:
        srtlist = sorted(list(clientips.items()),
                         key=lambda x: x[1],
                         reverse=True)
        for val in srtlist[:top_results]:
            susp_ips.append(val[0])

    if anchor != "":
        db = Configuration.db()
        time_line_query = ("""
                SELECT p_threat,tstart,tend,duration,clientip,respcode,respcodename
                FROM {0}.proxy_timeline
                WHERE
                    y={1} AND m={2} AND d={3} AND p_threat != '{4}'
                """).format(db, date.year, date.month, date.day,
                            anchor.replace("'", "//'"))

        tmp_timeline_data = ImpalaEngine.execute_query_as_list(time_line_query)

        imp_query = ("""
                        INSERT INTO TABLE {0}.proxy_timeline
                        PARTITION (y={2}, m={3},d={4})
                        SELECT
                            '{7}' as p_threat, concat(cast(p_date as string),
                            ' ', cast(MIN(p_time) as string)) AS tstart,
                            concat(cast(p_date as string), ' ',
                            cast(MAX(p_time) as string)) AS tend,
                            SUM(duration) AS duration,
                            clientip, respcode,"respCodeName" as respCodeName
                        FROM {0}.proxy
                        WHERE fulluri='{1}' AND clientip IN ({5})
                        AND y='{2}' AND m='{3}' AND d='{4}'
                        GROUP BY clientip, p_time, respcode, p_date
                        LIMIT {6}
                    """)\
                    .format(db,anchor,date.year,str(date.month).zfill(2),\
                    str(date.day).zfill(2),("'" + "','".join(susp_ips) + "'")\
                    ,top_results,anchor)

        app_path = Configuration.spot()
        old_file = "{0}/proxy/hive/oa/timeline/y={1}/m={2}/d={3}"\
        .format(app_path,date.year,date.month,date.day)

        HDFSClient.delete_folder(old_file, "impala")
        ImpalaEngine.execute_query("invalidate metadata")

        #Insert temporary values
        for item in tmp_timeline_data:
            insert_query = ("""
                        INSERT INTO {0}.proxy_timeline PARTITION(y={1} , m={2} ,d={3})
                        VALUES ('{4}', '{5}', '{6}',{7},'{8}','{9}','{10}')
                        """)\
                        .format(db,date.year,date.month,date.day,\
                        item["p_threat"],item["tstart"],item["tend"],item["duration"],item["clientip"],item["respcode"],item["respcodename"])

            ImpalaEngine.execute_query(insert_query)

        ImpalaEngine.execute_query(imp_query)
        response = "Timeline successfully saved"
    else:
        response = "Timeline couldn't be created"