Python pandas_normal примеры, fileTraff.read_file.pandas_normal Python примеры использования

Пример #1

0

Показать файл

def get_file_conn():
	'''
	根据file.log得到所需字段以df的数据类型返回
	
	'''
	#file_path_all=get_file('files')
	file_path_all='/usr/local/data/conn.log'
	if not os.path.exists(file_path_all):
		return pd.DataFrame()
	df=read_file.pandas_normal(file_path_all)
	files_content=df.iloc[:,[0,2,4,6,7,8,16,17,18,19]]#得到[files_id,ori_ip,resp_ip,type,hash]
	files_content=files_content.dropna(axis=0)#过滤内容为空的
	result=files_content.groupby([0,2,4,6,7,8,16,17,18,19])#
	list_result=[]
	for i in result.groups:
		time_da=time.strftime("%Y--%m--%d %H:%M:%S",time.localtime(int(float(i[0]))))
		ori_ip=i[1]
		resp_ip=i[2]
		proto=i[3]
		service=i[4]
		duration=i[5]
		orig_pkts=i[6]
		orig_bytes=i[7]
		resp_pkts=i[8]
		resp_bytes=i[9]
		list_result.append([time_da,ori_ip,resp_ip,proto,service,duration,orig_pkts,orig_bytes,resp_pkts,resp_bytes])
	return list_result

Пример #2

0

Показать файл

Файл: weird.py Проект: chensheng1/IDS_detection

def get_file_anomaly():
    '''
	根据file.log得到所需字段以df的数据类型返回
	
	'''
    #file_path_all=get_file('files')
    file_path_all = 'C:\\Users\\qq\\Desktop\\weird.log'
    if not os.path.exists(file_path_all):
        return pd.DataFrame()
    df = read_file.pandas_normal(file_path_all)
    files_content = df.iloc[:, [0, 2, 3, 4, 5,
                                6]]  #得到[files_id,ori_ip,resp_ip,type,hash]
    files_content = files_content.dropna(axis=0)  #过滤内容为空的
    result = files_content.groupby([0, 2, 3, 4, 5, 6])  #
    list_result = []
    for i in result.groups:
        time_da = time.strftime("%Y--%m--%d %H:%M:%S",
                                time.localtime(int(float(i[0]))))
        ori_ip = i[1]
        ori_k = int(i[2])
        resp_ip = i[3]
        resp_k = int(i[4])
        ana_be = i[5]
        list_result.append([time_da, ori_ip, ori_k, resp_ip, resp_k, ana_be])
    return list_result

Пример #3

0

Показать файл

def mac_analysis_test():
    pattern = db_config.get_pattern()
    mac_addr = []
    np_list = []
    file_path_all = path_config.get_conn_file_path()
    #file_path_all='/usr/local/bro/logs/current/conn.log'
    if not os.path.exists(file_path_all):
        return pd.DataFrame()
    df = read_file.pandas_normal(file_path_all)
    df = df[df.iloc[:, 2].str.match(pattern).fillna(False)]
    mac_device = df.iloc[:, [2, 22]]  #ip,resp_mac
    mac_device = mac_device.dropna(axis=0)
    mac_device_groupy = mac_device.groupby([2, 22])
    db_mac = get_mac_table()
    for i in mac_device_groupy.groups:
        #print i
        mac_addr = i[1]
        ip = i[0]
        mac_addr_result1 = mac_addr[0:2]
        mac_addr_result2 = mac_addr[3:5]
        mac_addr_result3 = mac_addr[6:8]
        mac_addr_result = mac_addr_result1 + '-' + mac_addr_result2 + '-' + mac_addr_result3
        mac_addr_result = mac_addr_result.upper()
        if mac_addr_result in db_mac.iterkeys():
            mac_pingpai = db_mac[mac_addr_result]
            #print mac_pingpai
            np_list.append([ip, mac_addr, mac_pingpai])
    #print i[1]
    #print mac_addr_result
        else:
            continue
    #print np_list
    df_result = pd.DataFrame(np_list, columns=['user', 'mac', 'pingpai'])
    return df_result

Пример #4

0

Показать файл

Файл: web.py Проект: chensheng1/IDS_detection

def get_logdata_df():
    '''
    获取所需写入数据库的数据
    返回dataframe,”用户，host，duration“
    '''
    pattern = db_config.get_pattern()  #根据网络环境修改
    #file_path_all=get_file('http')
    file_path_all = '/usr/local/data/http.log'
    if not os.path.exists(file_path_all):
        return pd.DataFrame()
    df = read_file.pandas_normal(file_path_all)
    df = df[df.iloc[:, 2].str.match(pattern)]
    df = df.iloc[:-1, [0, 2, 8]]  #ts,origIP,host
    df = df.dropna(how='any')
    grouped = df.groupby([0, 2, 8])
    db_host = get_data_base_host()  #数据库中收录的网站host
    #print db_host
    np_list = []
    for gp in grouped.groups:
        data_host = gp[2].split('.')
        if len(data_host) > 1:
            if data_host[-2] in ['com', 'cn', 'net', 'gov', 'org']:
                data_host = data_host[-3]
            else:
                data_host = data_host[-2]
        if data_host in db_host.iterkeys():
            gp_df = grouped.get_group(gp)
            min_ts = gp_df.iloc[:, 0].min()
            if min_ts < get_past_hour_ts(1):
                min_ts = get_past_hour_ts(1)
            max_ts = gp_df.iloc[:, 0].max()
            np_list.append([gp[0], gp[1], db_host[data_host]])
            np_list.append([gp[0], gp[1], db_host[data_host]])
    return np_list

Пример #5

0

Показать файл

Файл: main_temp.py Проект: chensheng1/IDS_detection

def read_file_now(filename):
    '''
    读取文件
    @param :文件完整路径
    '''
    while not os.path.exists(filename):
        time.sleep(0.5)
    return read_file.pandas_normal(filename)

Пример #6

0

Показать файл

Файл: main_temp.py Проект: LucasEcho/hubu1411

def read_file_now(filename):
    '''
    读取文件
    @param :文件完整路径
    '''
    while not os.path.exists(filename):
        time.sleep(0.5)
    return read_file.pandas_normal(filename)

Пример #7

0

Показать файл

Файл: browser.py Проект: chensheng1/IDS_detection

def get_usagent_content():
    #file_path_all=path_config.get_file_path()
    file_path_all = '/usr/local/data/http.log'
    if not os.path.exists(file_path_all):
        return pd.DataFrame()
    df = read_file.pandas_normal(file_path_all)
    usagent_content = df.iloc[:, [0, 2, 12]]  #得到[ip,usagent]
    usagent_content = usagent_content.dropna(axis=0)  #过滤内容为空的
    result = usagent_content.groupby([0, 2, 12])  #聚合[ip,usagent]

    browser = []
    for usa in result.groups:
        time_da = usa[0]
        ip = usa[1]
        usagent = usa[2]
        if len(usagent) > 28:
            if 'LBBrowser' in usagent or 'LBBROWSER ' in usagent:
                browser.append([time_da, ip, 'liebao'])
            elif 'QQBrowser' in usagent:
                browser.append([time_da, ip, 'QQBrowser'])
            elif 'Avant Browser' in usagent:
                browser.append([time_da, ip, 'Avant'])
            elif 'UCBrowser' in usagent or 'UCWEB' in usagent:
                browser.append([time_da, ip, 'UCbrowser'])
            elif 'Maxthon' in usagent:
                browser.append([time_da, ip, 'Maxthon'])
            elif 'TencentTraveler 4.0' in usagent:
                browser.append([time_da, ip, 'Tencent TT'])
            elif 'sogoumobilebrowser' in usagent:
                browser.append([time_da, ip, 'sogou browser'])
            elif '360SE' in usagent or '360browser' in usagent:
                browser.append([time_da, ip, '360Browser'])
            elif 'Firefox' in usagent or 'firefox' in usagent:
                browser.append([time_da, ip, 'Firefox'])
            elif 'Chrome' in usagent and 'Safari' in usagent:
                browser.append([time_da, ip, 'Chrome'])
            elif 'MobileSafari' in usagent:
                browser.append([time_da, ip, 'Safari'])
            elif 'macintosh' in usagent or 'Macintosh' in usagent and 'Safari' in usagent:
                browser.append([time_da, ip, 'Safari'])
            elif 'Opera' in usagent:
                browser.append([time_da, ip, 'Opera'])
            elif 'MSIE' in usagent or 'mise' in usagent:
                browser.append([time_da, ip, 'Internet Explorer'])
            elif 'Mb2345Browser' in usagent:
                browser.append([time_da, ip, '2345Browser'])
            elif 'Silk' in usagent:
                browser.append([time_da, ip, 'Silk'])
            elif 'baidubrowser' in usagent:
                browser.append([time_da, ip, 'BaiDuBrowser'])
            elif 'YaBrowser' in usagent:
                browser.append([time_da, ip, 'YaBrowser'])
            else:
                continue
        else:
            continue  #不是常用user-agent
    #print browser
    return browser

Пример #8

0

Показать файл

Файл: test.py Проект: LucasEcho/hubu1411

def yes_no():
    #调取read_file模块函数读取文件
    df_conn=read_file.pandas_normal('http.log')
#    file_all_path=get_file()
#    df_conn=read_file.pandas_normal_gz(file_all_path)
    df_conn.rename(columns={8:'host'},inplace=True)
    #获取所需要的信息
    df_conn_useful=df_conn.iloc[:-1,[1,8]]#resp_ip,resp_ip_bytes
    df_conn_useful=df_conn_useful.dropna(how='any')
    return df_conn_useful

Пример #9

0

Показать файл

def get_yc_sub():  #wihte gray
    #file_path_all=path_config.get_file_path()
    file_path_all = '/usr/local/data/weird.log'
    if not os.path.exists(file_path_all):
        return pd.DataFrame()
    df = read_file.pandas_normal(file_path_all)
    df_drop = df.iloc[:, [2]]
    usagent_content = df_drop.dropna(axis=0)  #过滤内容为空的
    result = usagent_content.groupby([2])
    yc_result_2 = []  #ip
    for yc_ip in result.groups:
        yc_result_2.append([yc_ip])
    #print (yc_result_2)
    return yc_result_2

Пример #10

0

Показать файл

Файл: weird_temp.py Проект: chensheng1/IDS_detection

def get_yc_content():
    #file_path_all=path_config.get_file_path()
    file_path_all = 'C:\\Users\\qq\\Desktop\\weird.log'
    if not os.path.exists(file_path_all):
        return pd.DataFrame()
    df = read_file.pandas_normal(file_path_all)
    df_drop = df.iloc[:, [0, 2, 4]]
    usagent_content = df_drop.dropna(axis=0)  #过滤内容为空的
    result = usagent_content.groupby([0, 2, 4])

    yc_result = []  #ip
    for yc_ip in result.groups:
        time = yc_ip[0]
        ori_ip = yc_ip[1]
        res_ip = yc_ip[2]
        yc_result.append([time, ori_ip, res_ip])
    return yc_result

Пример #11

0

Показать файл

Файл: protocol_rt.py Проект: chensheng1/IDS_detection

def get_data():
    '''
    获取所需数据
    '''
    file_path_all = file_path.current_log_file_path + 'conn.log'
    #file_path_all='conn.log'
    if not os.path.exists(file_path_all):
        return 0
    else:
        all_data = read_file.pandas_normal(file_path_all)
        value = all_data.iloc[:-1, [7, 17, 19]]  #service,orig_bytes,resp_bytes
        useful_data = value.dropna(how='any')  # 去掉包含缺失值的行
        useful_data.rename(columns={7: 'service'}, inplace=True)
        useful_data.rename(columns={17: 'orig_bytes'}, inplace=True)
        useful_data.rename(columns={19: 'resp_bytes'}, inplace=True)
        data = useful_data.groupby('service').sum()
        return data

Пример #12

0

Показать файл

def get_ip():
    #file_path_all=path_config.get_file_path()
    file_path_all = 'C:\\Users\\qq\\Desktop\\conn.log'
    if not os.path.exists(file_path_all):
        return pd.DataFrame()
    df = read_file.pandas_normal(file_path_all)
    df_drop = df.iloc[:, [0, 2, 4]]
    usagent_content = df_drop.dropna(axis=0)  #过滤内容为空的
    result = usagent_content.groupby([0, 2, 4])
    ip_result = []  #ip
    for i in result.groups:
        a = i[0]
        b = i[1]
        c = i[2]
        ip_result.append([a, b, c])
    ip = np.array(ip_result)
    return ip

Пример #13

0

Показать файл

def get_yc_content():
    #file_path_all=path_config.get_file_path()
    file_path_all = 'C:\\Users\\qq\\Desktop\\conn.log'
    if not os.path.exists(file_path_all):
        return pd.DataFrame()
    df = read_file.pandas_normal(file_path_all)
    df_drop = df.iloc[:, [8, 9, 10, 16, 17, 18, 19]]
    usagent_content = df_drop.dropna(axis=0)  #过滤内容为空的
    result = usagent_content.groupby([8, 9, 10, 16, 17, 18, 19])
    yc_result = []  #ip
    for i in result.groups:
        a = i[0]
        b = i[1]
        c = i[2]
        d = i[3]
        e = i[4]
        f = i[5]
        g = i[6]
        yc_result.append([a, b, c, d, e, f, g])
    res = np.array(yc_result)
    return res

Пример #14

0

Показать файл

Файл: respIpTop_u.py Проект: LucasEcho/hubu1411

def get_data():
    '''
    获取所需有效日志文件数据
    @return ：dataframe
    '''
    #调取read_file模块函数读取文件
    df_conn=read_file.pandas_normal('test_file/conn.log')
    #file_all_path=get_file()
    #df_conn=read_file.pandas_normal_gz(file_all_path)
    df_conn.rename(columns={4:'respIp',2:'origIp',6:'protocol'},inplace=True)
    #获取所需要的信息
    df_conn_useful=df_conn.iloc[:-1,[4,2,6]]#resp_ip,orig_ip
    df_conn_useful=df_conn_useful[df_conn_useful.protocol!='icmp'].iloc[:,[0,1]]
    df_conn_useful=df_conn_useful[df_conn_useful.iloc[:,1].str.match('10\.185\.')]
    print df_conn_useful
    df_conn_useful.drop_duplicates(['respIp','origIp'],inplace=True)
    df_all=df_conn_useful.groupby(['respIp']).count()
    print df_all
    df_results=df_all.sort_values(by='origIp',ascending=False).head(10)
    print df_results
    return df_results

Пример #15

0

Показать файл

def get_yc_content():  #black
    #file_path_all=path_config.get_file_path()
    file_path_all = '/usr/local/data/weird.log'
    if not os.path.exists(file_path_all):
        return pd.DataFrame()
    df = read_file.pandas_normal(file_path_all)
    df_drop = df.iloc[:, [2, 6]]
    usagent_content = df_drop.dropna(axis=0)  #过滤内容为空的
    result = usagent_content.groupby([2, 6])

    yc_result = []  #ip
    for yc_ip in result.groups:
        ip = yc_ip[0]
        yc_tz = yc_ip[1]
        if yc_tz.startswith('connection'):
            yc_result.append([ip])
    #dataa=str(yc_result)
    #dataa=dataa.replace('[','')
    #dataa=dataa.replace(']','')
    #a=list(eval(dataa))
    #print dataa
    return yc_result

Пример #16

0

Показать файл

Файл: app.py Проект: chensheng1/IDS_detection

def get_usagent_content():
    #file_path_all=get_file('http')
    file_path_all = '/usr/local/data/http.log'
    if not os.path.exists(file_path_all):
        return pd.DataFrame()
    df = read_file.pandas_normal(file_path_all)
    df_drop = df.iloc[:, [0, 2, 12]]
    usagent_content = df_drop.dropna(axis=0)  #过滤内容为空的
    result = usagent_content.groupby([0, 2, 12])  #聚合[ip,usagent]

    app_use = []  #user,name,duration
    for app in result.groups:
        time = app[0]
        ip = app[1]
        app_name = app[2]
        if len(app_name) > 5:
            if app_name.startswith('Mozilla') or app_name.startswith(
                    'Dalvik') or app_name.startswith(
                        'Safari') or app_name.startswith('Opera'):
                continue

            else:

                if app_name.startswith('Youku') or app_name.startswith(
                        'youku-tudou'):
                    app_use.append([time, ip, 'Youku'])

                elif app_name.startswith('ZhuiShuShenQi'):
                    app_use.append([time, ip, 'ZhuiShuShenQi'])

                elif app_name.startswith('Zeus'):
                    app_use.append([time, ip, 'Zeus'])

                elif app_name.startswith('YYMobile'):
                    app_use.append([time, ip, 'YYMobile'])

                elif app_name.startswith('ynote'):  #有道笔记
                    app_use.append([time, ip, 'ynote'])

                elif app_name.startswith('youdao_dict'):  #有道词典
                    app_use.append([time, ip, 'youdao_dict'])

                elif app_name.startswith('xiami'):  #虾米
                    app_use.append([time, ip, 'xiami'])

                elif app_name.startswith('Xunlei'):
                    app_use.append([time, ip, 'xunlei'])

                elif app_name.startswith('Xfplay'):
                    app_use.append([time, ip, 'Xfplay'])

                elif app_name.startswith('wpsoffice'):
                    app_use.append([time, ip, 'wpsoffice'])

                elif app_name.startswith('wifikey') or app_name.startswith(
                        'wfbl'):
                    app_use.append([time, ip, 'wifikey'])

                elif app_name.startswith('WeRead'):
                    app_use.append([time, ip, 'WeRead'])

                elif app_name.startswith('Weibo') or app_name.startswith(
                        'SinaNews'):
                    app_use.append([time, ip, 'Weibo'])

                elif app_name.startswith('WeChat') or app_name.startswith(
                        'MicroMessenger'):
                    app_use.append([time, ip, 'WeChat'])

                elif app_name.startswith('Valve'):
                    app_use.append([time, ip, 'Valve'])

                elif app_name.startswith('UC/'):
                    app_use.append([time, ip, 'UC'])  #

                elif app_name.startswith('Tga') or app_name.startswith('TGA'):
                    app_use.append([time, ip, 'Tga'])

                elif app_name.startswith('ttplayer'):
                    app_use.append([time, ip, 'ttplayer'])

                elif app_name.startswith('ting'):  #百度音乐
                    app_use.append([time, ip, 'ting'])

                elif app_name.startswith('TBClient') or app_name.startswith(
                        'Taobao'):
                    app_use.append([time, ip, 'Taobao'])

                elif app_name.startswith('SOHUVideoHD') or app_name.startswith(
                        'sohuiPadVideo'):
                    app_use.append([time, ip, 'SOHUVideo'])

                elif app_name.startswith('ShanbayWords'):
                    app_use.append([time, ip, 'ShanbayWords'])

                elif app_name.startswith('sogou_ime'):
                    app_use.append([time, ip, 'sogou'])

                elif app_name.startswith('smoba'):
                    app_use.append([time, ip, 'wzry'])

                elif app_name.startswith('SohuNews'):
                    app_use.append([time, ip, 'SohuNews'])

                elif app_name.startswith('Qzone') or app_name.startswith(
                        'qzone') or app_name.startswith('android-qzone'):
                    app_use.append([time, ip, 'Qzone'])

                elif app_name.startswith('QYPlayer') or app_name.startswith(
                        'QIYIVideo') or app_name.startswith(
                            'iQiYiPhoneVideo'):  #爱奇艺iQiYiPhoneVideo
                    app_use.append([time, ip, 'QYPlayer'])

                elif app_name.startswith('qqlive'):
                    app_use.append([time, ip, 'qqlive'])

                elif app_name.startswith('QQGame'):
                    app_use.append([time, ip, 'QQGame'])

                elif app_name.startswith('QQMusic') or app_name.startswith(
                        'ANDROIDQQMUSIC'):
                    app_use.append([time, ip, 'QQMusic'])

                elif app_name.startswith('qqpy') or app_name.startswith(
                        'qqppim'):
                    app_use.append([time, ip, 'qqpy'])

                elif app_name.startswith('PandaReader'):
                    app_use.append([time, ip, 'PandaReader'])

                elif app_name.startswith('PPStream'):
                    app_use.append([time, ip, 'PPStream'])

                elif app_name.startswith('PandaTV') or app_name.startswith(
                        'pandatv') or app_name.startswith('pandaTV'):
                    app_use.append([time, ip, 'PandaTV'])

                elif app_name.startswith('News'):
                    app_use.append([time, ip, 'News'])

                elif app_name.startswith('NeteaseMusic'):
                    app_use.append([time, ip, 'NeteaseMusic'])

                elif app_name.startswith('netdisk'):
                    if app_name.find('PC-Windows'):
                        continue
                    else:
                        app_use.append([time, ip, 'netdisk'])

                elif app_name.startswith('MQQBrowser') or app_name.startswith(
                        'QQBrowser'):
                    app_use.append([time, ip, 'MQQBrowser'])

                elif app_name.startswith('MiuiMusic'):
                    app_use.append([time, ip, 'MiuiMusic'])

                elif app_name.startswith('Miaopai'):
                    app_use.append([time, ip, 'Miaopai'])

                elif app_name.startswith('MobileMap'):
                    app_use.append([time, ip, 'MobileMap'])

                elif app_name.startswith('Mogujie'):
                    app_use.append([time, ip, 'Mogujie'])

                elif app_name.startswith('MONO'):
                    app_use.append([time, ip, 'MONO'])

                elif app_name.startswith('MomoChat'):
                    app_use.append([time, ip, 'Momo'])

                elif app_name.startswith('MGTV'):
                    app_use.append([time, ip, 'MGTV'])

                elif app_name.startswith('live'):
                    app_use.append([time, ip, 'live'])

                elif app_name.startswith('LOFTER'):
                    app_use.append([time, ip, 'LOFTER'])

                elif app_name.startswith('Letv'):
                    app_use.append([time, ip, 'Letv'])

                elif app_name.startswith('kugou/'):
                    app_use.append([time, ip, 'kugou'])

                elif app_name.startswith('kwai'):
                    app_use.append([time, ip, 'kwai'])

                elif app_name.startswith('hearthstone'):
                    app_use.append([time, ip,
                                    'hearthstone'])  #HCDNLivenet6/6.0.3.64

                elif app_name.startswith('Jdipad') or app_name.startswith(
                        'jdapp') or app_name.startswith('JD'):
                    app_use.append([time, ip, 'JD'])

                elif app_name.startswith('Jike'):
                    app_use.append([time, ip, 'Jike'])

                elif app_name.startswith('iFLYCloud'):
                    app_use.append([time, ip, 'iFLYCloud'])

                elif app_name.startswith('IPadQQ') or app_name.startswith(
                        'QQ/') or app_name.startswith(
                            'QQClient') or app_name.startswith('QQ D'):
                    app_use.append([time, ip, 'QQ'])

                elif app_name.startswith('HiSpace') or app_name.startswith(
                        'hispace'):
                    app_use.append([time, ip, 'HiSpace'])

                elif app_name.startswith('HunanTV') or app_name.startswith(
                        'MGTV'):
                    app_use.append([time, ip, 'MGTV'])

                elif app_name.startswith('Flipboard'):
                    app_use.append([time, ip, 'Flipboard'])

                elif app_name.startswith('Fanli'):
                    app_use.append([time, ip, 'Fanli'])

                elif app_name.startswith('Funshion'):
                    app_use.append([time, ip, 'Funshion'])

                elif app_name.startswith('fenghuangdiantai'):
                    app_use.append([time, ip, 'fenghuangdiantai'])

                elif app_name.startswith('esbook'):
                    app_use.append([time, ip, 'esbook'])

                elif app_name.startswith('DYZB'):
                    app_use.append([time, ip, 'DYZB'])

                elif app_name.startswith('ComicReader'):
                    app_use.append([time, ip, 'ComicReader'])  #

                elif app_name.startswith('Changba'):
                    app_use.append([time, ip, 'Changba'])

                elif app_name.startswith('Cupid'):
                    app_use.append([time, ip, 'Cupid'])

                elif app_name.startswith('BaiduHD'):
                    app_use.append([time, ip, 'BaiduHD'])

                elif app_name.startswith('bdtb'):  #TBClient
                    app_use.append([time, ip, 'bdtb'])

                elif app_name.startswith('bukaios'):
                    app_use.append([time, ip, 'bukaios'])

                elif app_name.startswith('Blued'):
                    app_use.append([time, ip, 'Blued'])

                elif app_name.startswith('BtcTrade'):
                    app_use.append([time, ip, 'BtcTrade'])

                elif app_name.startswith('Android QQMail '):
                    app_use.append([time, ip, 'QQMail '])

                elif app_name.startswith('AMAP'):
                    app_use.append([time, ip, 'gd MAP'])

                elif app_name.startswith('AiMeiTuan'):
                    app_use.append([time, ip, 'AiMeiTuan'])

                elif app_name.startswith('alipay') or app_name.startswith(
                        'Alipay'):
                    app_use.append([time, ip, 'alipay'])

                elif app_name.startswith('AppStore'):
                    app_use.append([time, ip, 'AppStore'])

                elif app_name.startswith('360freewifi'):
                    app_use.append([time, ip, '360freewifi'])

                else:
                    if len(app_name) > 50 and app_name.find('qqnews') > 0:
                        app_use.append([time, ip, 'QQ'])

                    elif len(app_name) > 25 and app_name.find('_weibo_') > 0:
                        app_use.append([time, ip, 'Weibo'])

                    elif len(app_name) > 58 and app_name.find(
                            'ANDROID_TB') > 0:
                        app_use.append([time, ip, 'Taobao'])

        else:
            continue
    #print df_app_use
    return app_use

Пример #17

0

Показать файл

def get_usagent_content():
    #file_path_all=get_file('http')
    file_path_all = '/usr/local/data/http.log'
    if not os.path.exists(file_path_all):
        return pd.DataFrame()
    df = read_file.pandas_normal(file_path_all)
    usagent_content = df.iloc[:, [0, 2, 12]]  #得到[ip,usagent]
    usagent_content = usagent_content.dropna(axis=0)  #过滤内容为空的
    result = usagent_content.groupby([0, 2, 12])  #聚合[ip,usagent]

    device_type_result = []
    for usa in result.groups:
        time_da = time.strftime("%Y--%m--%d %H:%M:%S",
                                time.localtime(int(float(usa[0]))))
        ip = usa[1]
        usagent = usa[2]
        if len(usagent) > 28:
            if usagent.startswith('Mozilla') or usagent.startswith(
                    'Safari') or usagent.startswith(
                        'Opera') or usagent.startswith(
                            'Dalvik') or usagent.startswith(
                                'User-Agent'):  #常用user-agent
                #取得括号中内容
                if usagent.find('(') > 0:
                    kh_right = usagent.split('(')[1]
                    kh_content = [kh_right.split(')')[0]]
                    for i in kh_content:
                        if i.startswith('X11'):
                            if i.find('AMD64') or i.find('amd64') or i.find(
                                    'ARM'
                            ) or i.find('arm') or i.find('MIPS') or i.find(
                                    'mips') or i.find('ppc') or i.find(
                                        'sparc64') or i.find('i586') or i.find(
                                            'i686') or i.find('x64') or i.find(
                                                'x86') or i.find('x86_64'):
                                device_type_result.append(
                                    [time_da, ip, 'Linux', 'Linux'])
                        elif i.startswith('Windows') or i.startswith(
                                'compatible'):
                            if i.find('Windows NT'):
                                localtion_NT = i.find('Windows NT')
                                localtion_FH = i.find(';', localtion_NT)
                                if localtion_FH > 0 and localtion_NT > 0:
                                    Windows_version = i[
                                        localtion_NT:localtion_FH]
                                else:
                                    continue
                                device_type_result.append(
                                    [time_da, ip, Windows_version, 'Windows'])
                            elif i.find('Windows 98'):
                                device_type_result.append(
                                    [time_da, ip, 'Windows 98', 'Windows'])

                        elif i.startswith('x86_64') or i.startswith(
                                'Macintosh'):
                            device_type_result.append(
                                [time_da, ip, 'Mac', 'IOS'])
                        #IOS设备:
                        #iPhone,iPad,Mac,
                        elif i.startswith('iPhone'):
                            localtion_hx1 = i.find('_')
                            localtion_hx2 = i.find('_', localtion_hx1 + 1,
                                                   localtion_hx1 + 3)
                            if localtion_hx2 > 0 and localtion_hx1 > 0:
                                iPhone_version = 'iPhone' + ' ' + i[
                                    localtion_hx1 - 2:localtion_hx2 + 2]
                            elif localtion_hx2 < 0:
                                iPhone_version = 'iPhone' + ' ' + i[
                                    localtion_hx1 - 2:localtion_hx1 + 2]
                            else:
                                continue
                            device_type_result.append(
                                [time_da, ip, iPhone_version, 'IOS'])
                        elif i.startswith('iPad'):
                            localtion_hx1 = i.find('_')
                            localtion_hx2 = i.find('_', localtion_hx1 + 1,
                                                   localtion_hx1 + 5)
                            if localtion_hx2 > 0 and localtion_hx1 > 0:
                                iPad_version = 'iPad' + ' ' + i[
                                    localtion_hx1 - 2:localtion_hx2 + 2]
                            elif localtion_hx2 < 0:
                                iPad_version = 'iPad' + ' ' + i[
                                    localtion_hx1 - 2:localtion_hx1 + 2]
                            else:
                                continue
                            device_type_result.append(
                                [time_da, ip, iPad_version, 'IOS'])
                        elif i.startswith('compatible'):
                            if i.find('Windows Phone') > 0:
                                device_type_result.append(
                                    [time_da, ip, 'Windows Phone', 'Windows'])
                        elif i.startswith('Windows Phone'):
                            device_type_result.append(
                                [time_da, ip, 'Windows Phone', 'Windows'])
                        elif i.startswith('Linux') and i.find('Android') > 0:
                            if len(i) > 28:
                                Andriod_version = i.split(';')
                                if len(Andriod_version[-1]) > 9:
                                    Andriod_version_result = Andriod_version[
                                        -1]
                                else:
                                    Andriod_version_result = Andriod_version[
                                        -2]

                                Andriod_result_temp = Andriod_version_result.split(
                                    ' ')
                                Andriod_result = Andriod_result_temp[:-1]
                                if len(Andriod_result) == 4:
                                    Andriod_Linux_result = Andriod_result[
                                        1] + ' ' + Andriod_result[
                                            2] + ' ' + Andriod_result[3]
                                    device_type_result.append([
                                        time_da, ip, Andriod_Linux_result,
                                        'Android'
                                    ])
                                elif len(Andriod_result) == 3:
                                    Andriod_Linux_result = Andriod_result[
                                        1] + ' ' + Andriod_result[2]
                                    device_type_result.append([
                                        time_da, ip, Andriod_Linux_result,
                                        'Android'
                                    ])
                                elif len(Andriod_result) == 2:
                                    Andriod_Linux_result = Andriod_result[1]
                                    device_type_result.append([
                                        time_da, ip, Andriod_Linux_result,
                                        'Android'
                                    ])
                                else:
                                    continue
                            else:
                                continue

                else:

                    continue  #没括号
            else:
                continue  #不是常用user-agent
    device_type_result
    #print df_device_type_result
    return device_type_result

Python pandas_normal примеры использования