def fetch_domains(): """ To fetch urls from the database, and return urls """ db = MySQL() sql = 'SELECT domain FROM domain_white_list' db.query(sql) urls = db.fetch_all_rows() return tuple_to_list(urls)
def get_resource_data(tb_name): """ 获得基础数据 :param tb_name:string 表名 :return: 返回基础数据 """ db = MySQL(DESTINATION_CONFIG) db.query('SELECT tld, SUM(whois_sum) AS count FROM domain_whois_%s WHERE flag <> "-6" GROUP BY tld' % tb_name) results = db.fetch_all_rows() return results
def update_day(): """更新表whois_sum_by_day,即统计数据库中最新的域名whois总量 """ print str(datetime.now()), '开始更新当天数据库中最新的WHOIS总量(whois_sum_by_day)' db = MySQL(DESTINATION_CONFIG) sql = 'INSERT INTO whois_sum_by_day(sum) SELECT MAX(tld_sum) FROM whois_sum \ WHERE to_days(insert_time) = to_days(now())' db.insert(sql) db.close() print str(datetime.now()), '结束更新当天数据库中最新的WHOIS总量(whois_sum_by_day)'
def update_table(tb_name): """ 更新表 :param tb_name: :return: """ sql = 'SELECT flag,sum(whois_sum) FROM domain_whois_%s GROUP BY flag' % tb_name.lower() flag_undetected = flag_no_svr = flag_no_connect = 0 flag_reg_info = flag_reg_date = flag_part_info = 0 db = MySQL(DESTINATION_CONFIG) db.query(sql) results = db.fetch_all_rows() for item in results: flag = item[0] whois_sum = item[1] if flag == '-6': flag_undetected += whois_sum if flag == '-5': flag_no_svr += whois_sum elif flag == '-1' or flag == '-2' or flag == '-3' or flag == '-4': flag_no_connect += whois_sum elif flag == '120' or flag == '121' or flag == '122': flag_reg_info += whois_sum elif flag == '110' or flag == '102' or flag == '112': flag_reg_date += whois_sum elif flag == '100' or flag == '101' or flag == '111': flag_part_info += whois_sum sql = 'INSERT INTO table_overall_history (table_name,flag_undetected,\ flag_no_svr,flag_no_connect,flag_reg_info,flag_reg_date,flag_part_info) \ VALUES ("%s","%s","%s","%s","%s","%s","%s")' % ('domain_whois_'+tb_name,flag_undetected,flag_no_svr,flag_no_connect,flag_reg_info,flag_reg_date,flag_part_info) db.insert(sql) db.close()
def update_db(): """ 更新数据库 """ svr_num = sum_all_list() db = MySQL(DESTINATION_CONFIG) db.truncate('TRUNCATE TABLE top_sec_svr') sql = 'INSERT INTO top_sec_svr(top_svr, sec_svr, whois_sum) VALUES("%s", "%s", "%s")' for top,sec,num in svr_num: db.insert_no_commit(sql % (top,sec,num)) db.commit() db.close()
def update_destination(tb_name, results): """ 更新数据库 :param results: :return: """ destination_db = MySQL(DESTINATION_CONFIG) destination_db.truncate('TRUNCATE TABLE domain_whois_%s' % tb_name) for item in results: destination_db.insert_no_commit('INSERT INTO domain_whois_%s (tld,flag,whois_sum) VALUES ("%s","%s","%s")' % ( tb_name, item[0], item[1], item[2])) destination_db.commit() destination_db.close()
def update_db(): """更新数据库""" tld_whois = sum_all_whois() total = 0 db = MySQL(DESTINATION_CONFIG) sql = 'INSERT INTO tld_whois_sum_history(tld,whois_sum) VALUES("%s", "%s")' # 插入数据 for tld,whois in tld_whois: db.insert_no_commit(sql % (tld,whois)) total += whois db.commit() sql = 'INSERT INTO whois_sum(tld_sum) VALUES("%s")' db.insert(sql % total) db.close()
def insert_db(): file_name = open('small_benign.txt') urls = file_name.readlines() db = MySQL() count = 0 for url in urls: sql = 'Insert into url_features(url,malicious)VALUES ("%s","0")'%(url.strip()) db.insert_no_commit(sql) count += 1 db.commit() db.close() print count
def update_top_sec_num(): """ 更新表msvr_ssvr,统计顶级服务器和二级服务器数量 """ print str(datetime.now()), '开始更新数据库中顶级和二级服务器数量(msvr_ssvr)' db = MySQL(DESTINATION_CONFIG) sql = 'UPDATE msvr_ssvr SET msvr_ssvr.ssvr=(SELECT COUNT(DISTINCT sec_svr) FROM top_sec_svr)' db.update(sql) sql = 'UPDATE msvr_ssvr SET msvr=(SELECT COUNT(DISTINCT addr) FROM whois_addr)' db.update(sql) db.close() print str(datetime.now()), '结束更新数据库中顶级和二级服务器数量(msvr_ssvr)'
def fetch_source_data(tb_name): """获取源数据 :param tb_name: string 表名 :return: results 数据 """ db = MySQL(DESTINATION_CONFIG) db.query('SELECT tld,flag,whois_sum FROM domain_whois_%s WHERE flag <> "-6" GROUP BY tld,flag' % tb_name) results = db.fetch_all_rows() db.close() return results
def fetch_resource_data(tb_name): """获得源数据 :param tb_name: string 表名 :return: results: 查询结果 """ db = MySQL(DESTINATION_CONFIG) db.query('SELECT tld, SUM(whois_sum) FROM domain_whois_%s GROUP BY tld' % tb_name) results = db.fetch_all_rows() db.close() return results
def get_data_from_source(tb_name): """ :param name:string 数据库名称 :return: results 数据库whois数量分布 """ source_db = MySQL(SOURCE_CONFIG) source_db.query('select tld,flag,count(*) as whois_sum from domain_whois_%s group by tld,flag' % (tb_name)) results = source_db.fetch_all_rows() source_db.close() return results
def process_id_to_user(process_id): """通过process_id获取用户的邮箱地址和主机地址""" sql = "SELECT email,host_ip,pid,code_route,process_name,warning_times FROM `process_info`,user_account,host_info WHERE process_id = '%s' AND user_account.user_id = process_info.user_id AND host_info.host_id = process_info.host_id" db = MySQL(SOURCE_CONFIG) db.query(sql % process_id) email_host = db.fetch_all_rows() email = email_host[0][0] host = base64.decodestring(email_host[0][1]) pid = email_host[0][2] code_route = email_host[0][3] process_name = email_host[0][4] warning_times = email_host[0][5] db.close() return email, host,pid,code_route,process_name,warning_times
def insert_domains(domain_set): """ 更新数据库中词汇特征 :param url_feature: :return: """ db = MySQL() for domain in domain_set: sql = 'Insert into domain_features(domain,tld,tld_token_count,domain_length,domain_characters,domain_digit)VALUES ("%s","%s","%s","%s","%s","%s")' \ %(domain['domain'],domain['tld'],domain['tld_token_count'],domain['domain_length'],domain['character'],domain['digit']) db.insert_no_commit(sql) db.commit() db.close()
def fetch_data(): """ 从数据库中获取域名(domain)基础数据 :return: 返回基础数据 """ db = MySQL() sql = 'SELECT domain FROM domain_features' db.query(sql) domains = db.fetch_all_rows() db.close() return domains
def fetch_data(): """ 从数据库中获取域名长度(domain_length)基础数据 :return: 返回基础数据 """ db = MySQL() sql = 'SELECT domain_characters FROM domain_features' db.query(sql) tlds = db.fetch_all_rows() db.close() return tlds
def fetch_data(): """ 从数据库中获取数据基础数据 :return: 返回基础数据 """ db = MySQL() sql = 'SELECT url_length,path_tokens,path_brand,domain_tokens,malicious,domain_characters,path_characters FROM url_features_copy_copy' db.query(sql) urls = db.fetch_all_rows() db.close() return urls
def fetch_data(): """ 从数据库中获取域名长度(domain_length)基础数据 :return: 返回基础数据 """ db = MySQL() sql = 'SELECT tld FROM domain_features WHERE tld_token_count = "2"' db.query(sql) tlds = db.fetch_all_rows() db.close() return tlds
def update_db(): """更新数据库""" db = MySQL(DESTINATION_CONFIG) truncate_sql = 'TRUNCATE TABLE tld_whois_flag' db.truncate(truncate_sql) sql = 'INSERT INTO tld_whois_flag(tld, flag,flag_detail, whois_sum) VALUES("%s", "%s", "%s", "%s")' tld_flag_num = sum_all_flags() for tld, flag_detail, num in tld_flag_num: try: if int(flag_detail) < 0: flag = '0' elif int(flag_detail) >= 120: flag = '1' elif flag_detail == '102' or flag_detail == '110' or flag_detail == '112': flag = '2' else: flag = '3' db.insert_no_commit(sql % (tld,flag,flag_detail,num)) except: continue db.commit() db.close()
def fetch_data(): """ 从数据库中获取域名长度(domain_length)基础数据 :return: 返回基础数据 """ db = MySQL() sql = 'SELECT tld FROM domain_features' sql = 'SELECT SUBSTRING_INDEX(tld,".",-1) as a FROM domain_features' db.query(sql) domains_length = db.fetch_all_rows() db.close() return domains_length
def update_table(tb_name): """ 更新表 :param tb_name: :return: """ sql = 'SELECT flag,sum(whois_sum) FROM domain_whois_%s GROUP BY flag' % tb_name.lower( ) flag_undetected = flag_no_svr = flag_no_connect = 0 flag_reg_info = flag_reg_date = flag_part_info = 0 db = MySQL(DESTINATION_CONFIG) db.query(sql) results = db.fetch_all_rows() for item in results: flag = item[0] whois_sum = item[1] if flag == '-6': flag_undetected += whois_sum if flag == '-5': flag_no_svr += whois_sum elif flag == '-1' or flag == '-2' or flag == '-3' or flag == '-4': flag_no_connect += whois_sum elif flag == '120' or flag == '121' or flag == '122': flag_reg_info += whois_sum elif flag == '110' or flag == '102' or flag == '112': flag_reg_date += whois_sum elif flag == '100' or flag == '101' or flag == '111': flag_part_info += whois_sum sql = 'INSERT INTO table_overall_history (table_name,flag_undetected,\ flag_no_svr,flag_no_connect,flag_reg_info,flag_reg_date,flag_part_info) \ VALUES ("%s","%s","%s","%s","%s","%s","%s")' % ( 'domain_whois_' + tb_name, flag_undetected, flag_no_svr, flag_no_connect, flag_reg_info, flag_reg_date, flag_part_info) db.insert(sql) db.close()
def update_url_features(url_feature): """ 更新数据库中词汇特征 :param url_feature: :return: """ db = MySQL() sql = 'UPDATE url_features SET url_length = "%s", domain="%s",domain_tokens="%s",domain_characters="%s",path="%s", ' \ 'path_tokens="%s",path_characters="%s",path_brand="%s" WHERE url="%s"' % \ (url_feature[1],url_feature[2],url_feature[3],url_feature[4],url_feature[5],url_feature[6], url_feature[7],url_feature[8],url_feature[0]) db.update(sql) db.close()
def get_source_data(tb_name): """ 得到基础数据 :param tb_name:string 表名 :return: """ db = MySQL(SOURCE_CONFIG) sql = 'SELECT top_whois_server as top_svr, sec_whois_server AS sec_svr, count(*) \ FROM domain_whois_%s WHERE sec_whois_server <> "" GROUP BY top_svr, sec_svr' % tb_name db.query(sql) results = db.fetch_all_rows() db.close() return results
def insert_host_error(error,process_ids): """更新主机错误""" db = MySQL(SOURCE_CONFIG) error_info = '主机异常:'+ str(error) status = '异常' sql = 'insert into process_status (process_id,cpu,mem,vsz,rss,status,error_info,log_size) values("%s","%s","%s","%s","%s","%s","%s","%s")' for _,process_id,_,_ in process_ids: db.insert(sql % (process_id, 0, 0, 0, 0, status, error_info,'-3')) # -3表示日志的主机错误 email, host, pid, code_route, code_name,warning_times = process_id_to_user(process_id) if warning_times > 0: content = (pid, host, status, error_info, code_route, code_name) if send_process_exception([email], content): print "邮件预警成功:" + email reduce_warning_times(process_id) else: print "邮件预警失败:" + email db.close()
def fetch_domains(): """ To fetch urls from the database, and return urls """ db1 = MySQL1() sql = 'SELECT url FROM benign_urls' db1.query(sql) urls1 = db1.fetch_all_rows() db2 = MySQL() sql = 'SELECT domain FROM domain_white_list' db2.query(sql) urls2 = db2.fetch_all_rows() db1.close() db2.close() urls1 = tuple_to_list(urls1) urls2 = tuple_to_list(urls2) urls = urls1 + urls2 print len(urls) urls = list(set(urls)) print len(urls) return urls
def fetch_domains(): """ To fetch urls from the database, and return urls """ db1 = MySQL1() sql = 'SELECT url FROM benign_urls' db1.query(sql) urls1 = db1.fetch_all_rows() db2 = MySQL() sql = 'SELECT domain FROM domain_white_list' db2.query(sql) urls2 = db2.fetch_all_rows() db1.close() db2.close() urls1 = tuple_to_list(urls1) urls2 = tuple_to_list(urls2) urls = urls1+urls2 print len(urls) urls = list(set(urls)) print len(urls) return urls
# print b.mean() # # mal_x = np.arange(len(mal_y)) # beg_x = np.arange(len(beg_y)) # fig = plt.figure() # fig.add_subplot(121) # plt.plot(beg_x,beg_y) # plt.xlabel(u'网址个数') # plt.ylabel(u"域名长度") # fig.add_subplot(122) # plt.plot(mal_x,mal_y,color='r') # plt.xlabel(u'网址个数') # plt.ylabel(u"域名长度") # plt.show() db = MySQL() mal_sql = 'select domain_tokens,domain_characters from url_features WHERE malicious="1"' db.query(mal_sql) mal = db.fetch_all_rows() mal_y = [] for j, i in mal: # print j,i mal_y.append(int(list(eval(i))[1] * list(eval(j))[1] / 100.0)) beg_sql = 'select domain_tokens,domain_characters from url_features WHERE malicious="0"' db.query(beg_sql) beg = db.fetch_all_rows() beg_y = [] for j, i in beg: beg_y.append(int(list(eval(i))[1] * list(eval(j))[1] / 100.0))
def fetch_domains(): """ To fetch urls from the database, and return urls """ tld = {} db1 = MySQL(DBCONFIG1) db2 = MySQL(DBCONFIG2) sql = 'SELECT SUBSTRING_INDEX(tld,".",-1) as a,count(*) FROM domain_features GROUP BY a' db1.query(sql) tld_count = db1.fetch_all_rows() for i in tld_count: print i if len(i[0]) == 0: continue sql = 'SELECT type FROM tld_details WHERE tld="%s"' % ('.' + i[0]) db2.query(sql) urls2 = db2.fetch_all_rows() try: if urls2[0][0] in tld.keys(): tld[urls2[0][0]] += i[1] else: tld[urls2[0][0]] = i[1] except: continue print tld db1.close() db2.close()
分析whois的ip地址的分布 """ from data_base import MySQL import numpy as np DBCONFIG = { 'host': '172.26.253.3', 'port': 3306, 'user': '******', 'passwd': 'platform', 'db': 'DomainWhois', 'charset': 'utf8' } db = MySQL(DBCONFIG) sql = 'select port_available from svr_ip WHERE level="2"' db.query(sql) test = db.fetch_all_rows() result = [] for i in test: result.append(len(i[0])) # print result
def reduce_warning_times(process_id): db = MySQL(SOURCE_CONFIG) sql = 'update process_info set warning_times = warning_times -1 where process_id = "%s"' % process_id db.update(sql) db.close()
def update_db(): """更新数据库""" tld_num = sum_all_domains() db = MySQL(DESTINATION_CONFIG) db.truncate('TRUNCATE TABLE domain_summary') update_sql = 'INSERT INTO domain_update(tld_name, domain_num) VALUES("%s", "%s")' summary_sql = 'INSERT INTO domain_summary(tld_name, domain_num) VALUES("%s", "%s")' for tld,num in tld_num: db.insert_no_commit(update_sql % (tld,num)) db.insert_no_commit(summary_sql % (tld,num)) db.commit() db.close()
def fetch_domains(): """ To fetch urls from the database, and return urls """ tld ={} db1 = MySQL(DBCONFIG1) db2 = MySQL(DBCONFIG2) sql = 'SELECT SUBSTRING_INDEX(tld,".",-1) as a,count(*) FROM domain_features GROUP BY a' db1.query(sql) tld_count = db1.fetch_all_rows() for i in tld_count: print i if len(i[0])==0: continue sql = 'SELECT type FROM tld_details WHERE tld="%s"' % ('.'+i[0]) db2.query(sql) urls2 = db2.fetch_all_rows() try: if urls2[0][0] in tld.keys(): tld[urls2[0][0]] += i[1] else: tld[urls2[0][0]] = i[1] except: continue print tld db1.close() db2.close()
# mal_x = np.arange(len(mal_y)) # beg_x = np.arange(len(beg_y)) # fig = plt.figure() # fig.add_subplot(121) # plt.plot(beg_x,beg_y) # plt.xlabel(u'网址个数') # plt.ylabel(u"域名长度") # fig.add_subplot(122) # plt.plot(mal_x,mal_y,color='r') # plt.xlabel(u'网址个数') # plt.ylabel(u"域名长度") # plt.show() db = MySQL() mal_sql = 'select domain_tokens,domain_characters from url_features WHERE malicious="1"' db.query(mal_sql) mal = db.fetch_all_rows() mal_y = [] for j,i in mal: # print j,i mal_y.append(int(list(eval(i))[1]*list(eval(j))[1]/100.0)) beg_sql = 'select domain_tokens,domain_characters from url_features WHERE malicious="0"' db.query(beg_sql) beg = db.fetch_all_rows() beg_y = [] for j,i in beg: beg_y.append(int(list(eval(i))[1]*list(eval(j))[1]/100.0))
""" from data_base import MySQL import numpy as np DBCONFIG = {'host':'172.26.253.3', 'port': 3306, 'user':'******', 'passwd':'platform', 'db':'DomainWhois', 'charset':'utf8'} db = MySQL(DBCONFIG) sql = 'select port_available from svr_ip WHERE level="2"' db.query(sql) test = db.fetch_all_rows() result = [] for i in test: result.append(len(i[0]))
def merge_same_rc(domain): """ 最简单的合并相同的记录 todo: 1. 以某时间窗口来进行合并,去掉因为探测导致的问题 """ domain_rc = [] rc_last_updated = [] db = MySQL(SOURCE_CONFIG) tb_name = domain2tb(domain) sql = 'select ips, cnames, ns,last_updated from ' + tb_name + ' WHERE domain="%s"' db.query(sql % domain) rc = db.fetch_all_rows() # 域名记录预处理 for i in rc: domain_rc.append([i[0], i[1], i[2]]) rc_last_updated.append(str(i[3])) # 遍历修改记录 for i in range(0, len(domain_rc) - 1): if sorted(domain_rc[i][0]) == sorted(domain_rc[i+1][0]) and sorted(domain_rc[i][1])==sorted(domain_rc[i+1][1]) \ and sorted(domain_rc[i][2])==sorted(domain_rc[i+1][2]): # 前后两次的记录是否一致 ## 若相邻一致的情况 # 先删除后者记录 delete_sql = 'delete from ' + tb_name + ' WHERE domain="%s" and last_updated="%s"' db.update(delete_sql % (domain, rc_last_updated[i + 1])) # 再将前者记录的last_updated时间修改为后者记录的时间 update_sql = 'update ' + tb_name + ' set last_updated="%s" WHERE domain="%s" AND last_updated="%s"' db.update(update_sql % (rc_last_updated[i + 1], domain, rc_last_updated[i])) else: pass db.close()