def get_file_conn(): ''' 根据file.log得到所需字段以df的数据类型返回 ''' #file_path_all=get_file('files') file_path_all='/usr/local/data/conn.log' if not os.path.exists(file_path_all): return pd.DataFrame() df=read_file.pandas_normal(file_path_all) files_content=df.iloc[:,[0,2,4,6,7,8,16,17,18,19]]#得到[files_id,ori_ip,resp_ip,type,hash] files_content=files_content.dropna(axis=0)#过滤内容为空的 result=files_content.groupby([0,2,4,6,7,8,16,17,18,19])# list_result=[] for i in result.groups: time_da=time.strftime("%Y--%m--%d %H:%M:%S",time.localtime(int(float(i[0])))) ori_ip=i[1] resp_ip=i[2] proto=i[3] service=i[4] duration=i[5] orig_pkts=i[6] orig_bytes=i[7] resp_pkts=i[8] resp_bytes=i[9] list_result.append([time_da,ori_ip,resp_ip,proto,service,duration,orig_pkts,orig_bytes,resp_pkts,resp_bytes]) return list_result
def get_file_anomaly(): ''' 根据file.log得到所需字段以df的数据类型返回 ''' #file_path_all=get_file('files') file_path_all = 'C:\\Users\\qq\\Desktop\\weird.log' if not os.path.exists(file_path_all): return pd.DataFrame() df = read_file.pandas_normal(file_path_all) files_content = df.iloc[:, [0, 2, 3, 4, 5, 6]] #得到[files_id,ori_ip,resp_ip,type,hash] files_content = files_content.dropna(axis=0) #过滤内容为空的 result = files_content.groupby([0, 2, 3, 4, 5, 6]) # list_result = [] for i in result.groups: time_da = time.strftime("%Y--%m--%d %H:%M:%S", time.localtime(int(float(i[0])))) ori_ip = i[1] ori_k = int(i[2]) resp_ip = i[3] resp_k = int(i[4]) ana_be = i[5] list_result.append([time_da, ori_ip, ori_k, resp_ip, resp_k, ana_be]) return list_result
def mac_analysis_test(): pattern = db_config.get_pattern() mac_addr = [] np_list = [] file_path_all = path_config.get_conn_file_path() #file_path_all='/usr/local/bro/logs/current/conn.log' if not os.path.exists(file_path_all): return pd.DataFrame() df = read_file.pandas_normal(file_path_all) df = df[df.iloc[:, 2].str.match(pattern).fillna(False)] mac_device = df.iloc[:, [2, 22]] #ip,resp_mac mac_device = mac_device.dropna(axis=0) mac_device_groupy = mac_device.groupby([2, 22]) db_mac = get_mac_table() for i in mac_device_groupy.groups: #print i mac_addr = i[1] ip = i[0] mac_addr_result1 = mac_addr[0:2] mac_addr_result2 = mac_addr[3:5] mac_addr_result3 = mac_addr[6:8] mac_addr_result = mac_addr_result1 + '-' + mac_addr_result2 + '-' + mac_addr_result3 mac_addr_result = mac_addr_result.upper() if mac_addr_result in db_mac.iterkeys(): mac_pingpai = db_mac[mac_addr_result] #print mac_pingpai np_list.append([ip, mac_addr, mac_pingpai]) #print i[1] #print mac_addr_result else: continue #print np_list df_result = pd.DataFrame(np_list, columns=['user', 'mac', 'pingpai']) return df_result
def get_logdata_df(): ''' 获取所需写入数据库的数据 返回dataframe,”用户,host,duration“ ''' pattern = db_config.get_pattern() #根据网络环境修改 #file_path_all=get_file('http') file_path_all = '/usr/local/data/http.log' if not os.path.exists(file_path_all): return pd.DataFrame() df = read_file.pandas_normal(file_path_all) df = df[df.iloc[:, 2].str.match(pattern)] df = df.iloc[:-1, [0, 2, 8]] #ts,origIP,host df = df.dropna(how='any') grouped = df.groupby([0, 2, 8]) db_host = get_data_base_host() #数据库中收录的网站host #print db_host np_list = [] for gp in grouped.groups: data_host = gp[2].split('.') if len(data_host) > 1: if data_host[-2] in ['com', 'cn', 'net', 'gov', 'org']: data_host = data_host[-3] else: data_host = data_host[-2] if data_host in db_host.iterkeys(): gp_df = grouped.get_group(gp) min_ts = gp_df.iloc[:, 0].min() if min_ts < get_past_hour_ts(1): min_ts = get_past_hour_ts(1) max_ts = gp_df.iloc[:, 0].max() np_list.append([gp[0], gp[1], db_host[data_host]]) np_list.append([gp[0], gp[1], db_host[data_host]]) return np_list
def read_file_now(filename): ''' 读取文件 @param :文件完整路径 ''' while not os.path.exists(filename): time.sleep(0.5) return read_file.pandas_normal(filename)
def get_usagent_content(): #file_path_all=path_config.get_file_path() file_path_all = '/usr/local/data/http.log' if not os.path.exists(file_path_all): return pd.DataFrame() df = read_file.pandas_normal(file_path_all) usagent_content = df.iloc[:, [0, 2, 12]] #得到[ip,usagent] usagent_content = usagent_content.dropna(axis=0) #过滤内容为空的 result = usagent_content.groupby([0, 2, 12]) #聚合[ip,usagent] browser = [] for usa in result.groups: time_da = usa[0] ip = usa[1] usagent = usa[2] if len(usagent) > 28: if 'LBBrowser' in usagent or 'LBBROWSER ' in usagent: browser.append([time_da, ip, 'liebao']) elif 'QQBrowser' in usagent: browser.append([time_da, ip, 'QQBrowser']) elif 'Avant Browser' in usagent: browser.append([time_da, ip, 'Avant']) elif 'UCBrowser' in usagent or 'UCWEB' in usagent: browser.append([time_da, ip, 'UCbrowser']) elif 'Maxthon' in usagent: browser.append([time_da, ip, 'Maxthon']) elif 'TencentTraveler 4.0' in usagent: browser.append([time_da, ip, 'Tencent TT']) elif 'sogoumobilebrowser' in usagent: browser.append([time_da, ip, 'sogou browser']) elif '360SE' in usagent or '360browser' in usagent: browser.append([time_da, ip, '360Browser']) elif 'Firefox' in usagent or 'firefox' in usagent: browser.append([time_da, ip, 'Firefox']) elif 'Chrome' in usagent and 'Safari' in usagent: browser.append([time_da, ip, 'Chrome']) elif 'MobileSafari' in usagent: browser.append([time_da, ip, 'Safari']) elif 'macintosh' in usagent or 'Macintosh' in usagent and 'Safari' in usagent: browser.append([time_da, ip, 'Safari']) elif 'Opera' in usagent: browser.append([time_da, ip, 'Opera']) elif 'MSIE' in usagent or 'mise' in usagent: browser.append([time_da, ip, 'Internet Explorer']) elif 'Mb2345Browser' in usagent: browser.append([time_da, ip, '2345Browser']) elif 'Silk' in usagent: browser.append([time_da, ip, 'Silk']) elif 'baidubrowser' in usagent: browser.append([time_da, ip, 'BaiDuBrowser']) elif 'YaBrowser' in usagent: browser.append([time_da, ip, 'YaBrowser']) else: continue else: continue #不是常用user-agent #print browser return browser
def yes_no(): #调取read_file模块函数读取文件 df_conn=read_file.pandas_normal('http.log') # file_all_path=get_file() # df_conn=read_file.pandas_normal_gz(file_all_path) df_conn.rename(columns={8:'host'},inplace=True) #获取所需要的信息 df_conn_useful=df_conn.iloc[:-1,[1,8]]#resp_ip,resp_ip_bytes df_conn_useful=df_conn_useful.dropna(how='any') return df_conn_useful
def get_yc_sub(): #wihte gray #file_path_all=path_config.get_file_path() file_path_all = '/usr/local/data/weird.log' if not os.path.exists(file_path_all): return pd.DataFrame() df = read_file.pandas_normal(file_path_all) df_drop = df.iloc[:, [2]] usagent_content = df_drop.dropna(axis=0) #过滤内容为空的 result = usagent_content.groupby([2]) yc_result_2 = [] #ip for yc_ip in result.groups: yc_result_2.append([yc_ip]) #print (yc_result_2) return yc_result_2
def get_yc_content(): #file_path_all=path_config.get_file_path() file_path_all = 'C:\\Users\\qq\\Desktop\\weird.log' if not os.path.exists(file_path_all): return pd.DataFrame() df = read_file.pandas_normal(file_path_all) df_drop = df.iloc[:, [0, 2, 4]] usagent_content = df_drop.dropna(axis=0) #过滤内容为空的 result = usagent_content.groupby([0, 2, 4]) yc_result = [] #ip for yc_ip in result.groups: time = yc_ip[0] ori_ip = yc_ip[1] res_ip = yc_ip[2] yc_result.append([time, ori_ip, res_ip]) return yc_result
def get_data(): ''' 获取所需数据 ''' file_path_all = file_path.current_log_file_path + 'conn.log' #file_path_all='conn.log' if not os.path.exists(file_path_all): return 0 else: all_data = read_file.pandas_normal(file_path_all) value = all_data.iloc[:-1, [7, 17, 19]] #service,orig_bytes,resp_bytes useful_data = value.dropna(how='any') # 去掉包含缺失值的行 useful_data.rename(columns={7: 'service'}, inplace=True) useful_data.rename(columns={17: 'orig_bytes'}, inplace=True) useful_data.rename(columns={19: 'resp_bytes'}, inplace=True) data = useful_data.groupby('service').sum() return data
def get_ip(): #file_path_all=path_config.get_file_path() file_path_all = 'C:\\Users\\qq\\Desktop\\conn.log' if not os.path.exists(file_path_all): return pd.DataFrame() df = read_file.pandas_normal(file_path_all) df_drop = df.iloc[:, [0, 2, 4]] usagent_content = df_drop.dropna(axis=0) #过滤内容为空的 result = usagent_content.groupby([0, 2, 4]) ip_result = [] #ip for i in result.groups: a = i[0] b = i[1] c = i[2] ip_result.append([a, b, c]) ip = np.array(ip_result) return ip
def get_yc_content(): #file_path_all=path_config.get_file_path() file_path_all = 'C:\\Users\\qq\\Desktop\\conn.log' if not os.path.exists(file_path_all): return pd.DataFrame() df = read_file.pandas_normal(file_path_all) df_drop = df.iloc[:, [8, 9, 10, 16, 17, 18, 19]] usagent_content = df_drop.dropna(axis=0) #过滤内容为空的 result = usagent_content.groupby([8, 9, 10, 16, 17, 18, 19]) yc_result = [] #ip for i in result.groups: a = i[0] b = i[1] c = i[2] d = i[3] e = i[4] f = i[5] g = i[6] yc_result.append([a, b, c, d, e, f, g]) res = np.array(yc_result) return res
def get_data(): ''' 获取所需有效日志文件数据 @return :dataframe ''' #调取read_file模块函数读取文件 df_conn=read_file.pandas_normal('test_file/conn.log') #file_all_path=get_file() #df_conn=read_file.pandas_normal_gz(file_all_path) df_conn.rename(columns={4:'respIp',2:'origIp',6:'protocol'},inplace=True) #获取所需要的信息 df_conn_useful=df_conn.iloc[:-1,[4,2,6]]#resp_ip,orig_ip df_conn_useful=df_conn_useful[df_conn_useful.protocol!='icmp'].iloc[:,[0,1]] df_conn_useful=df_conn_useful[df_conn_useful.iloc[:,1].str.match('10\.185\.')] print df_conn_useful df_conn_useful.drop_duplicates(['respIp','origIp'],inplace=True) df_all=df_conn_useful.groupby(['respIp']).count() print df_all df_results=df_all.sort_values(by='origIp',ascending=False).head(10) print df_results return df_results
def get_yc_content(): #black #file_path_all=path_config.get_file_path() file_path_all = '/usr/local/data/weird.log' if not os.path.exists(file_path_all): return pd.DataFrame() df = read_file.pandas_normal(file_path_all) df_drop = df.iloc[:, [2, 6]] usagent_content = df_drop.dropna(axis=0) #过滤内容为空的 result = usagent_content.groupby([2, 6]) yc_result = [] #ip for yc_ip in result.groups: ip = yc_ip[0] yc_tz = yc_ip[1] if yc_tz.startswith('connection'): yc_result.append([ip]) #dataa=str(yc_result) #dataa=dataa.replace('[','') #dataa=dataa.replace(']','') #a=list(eval(dataa)) #print dataa return yc_result
def get_usagent_content(): #file_path_all=get_file('http') file_path_all = '/usr/local/data/http.log' if not os.path.exists(file_path_all): return pd.DataFrame() df = read_file.pandas_normal(file_path_all) df_drop = df.iloc[:, [0, 2, 12]] usagent_content = df_drop.dropna(axis=0) #过滤内容为空的 result = usagent_content.groupby([0, 2, 12]) #聚合[ip,usagent] app_use = [] #user,name,duration for app in result.groups: time = app[0] ip = app[1] app_name = app[2] if len(app_name) > 5: if app_name.startswith('Mozilla') or app_name.startswith( 'Dalvik') or app_name.startswith( 'Safari') or app_name.startswith('Opera'): continue else: if app_name.startswith('Youku') or app_name.startswith( 'youku-tudou'): app_use.append([time, ip, 'Youku']) elif app_name.startswith('ZhuiShuShenQi'): app_use.append([time, ip, 'ZhuiShuShenQi']) elif app_name.startswith('Zeus'): app_use.append([time, ip, 'Zeus']) elif app_name.startswith('YYMobile'): app_use.append([time, ip, 'YYMobile']) elif app_name.startswith('ynote'): #有道笔记 app_use.append([time, ip, 'ynote']) elif app_name.startswith('youdao_dict'): #有道词典 app_use.append([time, ip, 'youdao_dict']) elif app_name.startswith('xiami'): #虾米 app_use.append([time, ip, 'xiami']) elif app_name.startswith('Xunlei'): app_use.append([time, ip, 'xunlei']) elif app_name.startswith('Xfplay'): app_use.append([time, ip, 'Xfplay']) elif app_name.startswith('wpsoffice'): app_use.append([time, ip, 'wpsoffice']) elif app_name.startswith('wifikey') or app_name.startswith( 'wfbl'): app_use.append([time, ip, 'wifikey']) elif app_name.startswith('WeRead'): app_use.append([time, ip, 'WeRead']) elif app_name.startswith('Weibo') or app_name.startswith( 'SinaNews'): app_use.append([time, ip, 'Weibo']) elif app_name.startswith('WeChat') or app_name.startswith( 'MicroMessenger'): app_use.append([time, ip, 'WeChat']) elif app_name.startswith('Valve'): app_use.append([time, ip, 'Valve']) elif app_name.startswith('UC/'): app_use.append([time, ip, 'UC']) # elif app_name.startswith('Tga') or app_name.startswith('TGA'): app_use.append([time, ip, 'Tga']) elif app_name.startswith('ttplayer'): app_use.append([time, ip, 'ttplayer']) elif app_name.startswith('ting'): #百度音乐 app_use.append([time, ip, 'ting']) elif app_name.startswith('TBClient') or app_name.startswith( 'Taobao'): app_use.append([time, ip, 'Taobao']) elif app_name.startswith('SOHUVideoHD') or app_name.startswith( 'sohuiPadVideo'): app_use.append([time, ip, 'SOHUVideo']) elif app_name.startswith('ShanbayWords'): app_use.append([time, ip, 'ShanbayWords']) elif app_name.startswith('sogou_ime'): app_use.append([time, ip, 'sogou']) elif app_name.startswith('smoba'): app_use.append([time, ip, 'wzry']) elif app_name.startswith('SohuNews'): app_use.append([time, ip, 'SohuNews']) elif app_name.startswith('Qzone') or app_name.startswith( 'qzone') or app_name.startswith('android-qzone'): app_use.append([time, ip, 'Qzone']) elif app_name.startswith('QYPlayer') or app_name.startswith( 'QIYIVideo') or app_name.startswith( 'iQiYiPhoneVideo'): #爱奇艺iQiYiPhoneVideo app_use.append([time, ip, 'QYPlayer']) elif app_name.startswith('qqlive'): app_use.append([time, ip, 'qqlive']) elif app_name.startswith('QQGame'): app_use.append([time, ip, 'QQGame']) elif app_name.startswith('QQMusic') or app_name.startswith( 'ANDROIDQQMUSIC'): app_use.append([time, ip, 'QQMusic']) elif app_name.startswith('qqpy') or app_name.startswith( 'qqppim'): app_use.append([time, ip, 'qqpy']) elif app_name.startswith('PandaReader'): app_use.append([time, ip, 'PandaReader']) elif app_name.startswith('PPStream'): app_use.append([time, ip, 'PPStream']) elif app_name.startswith('PandaTV') or app_name.startswith( 'pandatv') or app_name.startswith('pandaTV'): app_use.append([time, ip, 'PandaTV']) elif app_name.startswith('News'): app_use.append([time, ip, 'News']) elif app_name.startswith('NeteaseMusic'): app_use.append([time, ip, 'NeteaseMusic']) elif app_name.startswith('netdisk'): if app_name.find('PC-Windows'): continue else: app_use.append([time, ip, 'netdisk']) elif app_name.startswith('MQQBrowser') or app_name.startswith( 'QQBrowser'): app_use.append([time, ip, 'MQQBrowser']) elif app_name.startswith('MiuiMusic'): app_use.append([time, ip, 'MiuiMusic']) elif app_name.startswith('Miaopai'): app_use.append([time, ip, 'Miaopai']) elif app_name.startswith('MobileMap'): app_use.append([time, ip, 'MobileMap']) elif app_name.startswith('Mogujie'): app_use.append([time, ip, 'Mogujie']) elif app_name.startswith('MONO'): app_use.append([time, ip, 'MONO']) elif app_name.startswith('MomoChat'): app_use.append([time, ip, 'Momo']) elif app_name.startswith('MGTV'): app_use.append([time, ip, 'MGTV']) elif app_name.startswith('live'): app_use.append([time, ip, 'live']) elif app_name.startswith('LOFTER'): app_use.append([time, ip, 'LOFTER']) elif app_name.startswith('Letv'): app_use.append([time, ip, 'Letv']) elif app_name.startswith('kugou/'): app_use.append([time, ip, 'kugou']) elif app_name.startswith('kwai'): app_use.append([time, ip, 'kwai']) elif app_name.startswith('hearthstone'): app_use.append([time, ip, 'hearthstone']) #HCDNLivenet6/6.0.3.64 elif app_name.startswith('Jdipad') or app_name.startswith( 'jdapp') or app_name.startswith('JD'): app_use.append([time, ip, 'JD']) elif app_name.startswith('Jike'): app_use.append([time, ip, 'Jike']) elif app_name.startswith('iFLYCloud'): app_use.append([time, ip, 'iFLYCloud']) elif app_name.startswith('IPadQQ') or app_name.startswith( 'QQ/') or app_name.startswith( 'QQClient') or app_name.startswith('QQ D'): app_use.append([time, ip, 'QQ']) elif app_name.startswith('HiSpace') or app_name.startswith( 'hispace'): app_use.append([time, ip, 'HiSpace']) elif app_name.startswith('HunanTV') or app_name.startswith( 'MGTV'): app_use.append([time, ip, 'MGTV']) elif app_name.startswith('Flipboard'): app_use.append([time, ip, 'Flipboard']) elif app_name.startswith('Fanli'): app_use.append([time, ip, 'Fanli']) elif app_name.startswith('Funshion'): app_use.append([time, ip, 'Funshion']) elif app_name.startswith('fenghuangdiantai'): app_use.append([time, ip, 'fenghuangdiantai']) elif app_name.startswith('esbook'): app_use.append([time, ip, 'esbook']) elif app_name.startswith('DYZB'): app_use.append([time, ip, 'DYZB']) elif app_name.startswith('ComicReader'): app_use.append([time, ip, 'ComicReader']) # elif app_name.startswith('Changba'): app_use.append([time, ip, 'Changba']) elif app_name.startswith('Cupid'): app_use.append([time, ip, 'Cupid']) elif app_name.startswith('BaiduHD'): app_use.append([time, ip, 'BaiduHD']) elif app_name.startswith('bdtb'): #TBClient app_use.append([time, ip, 'bdtb']) elif app_name.startswith('bukaios'): app_use.append([time, ip, 'bukaios']) elif app_name.startswith('Blued'): app_use.append([time, ip, 'Blued']) elif app_name.startswith('BtcTrade'): app_use.append([time, ip, 'BtcTrade']) elif app_name.startswith('Android QQMail '): app_use.append([time, ip, 'QQMail ']) elif app_name.startswith('AMAP'): app_use.append([time, ip, 'gd MAP']) elif app_name.startswith('AiMeiTuan'): app_use.append([time, ip, 'AiMeiTuan']) elif app_name.startswith('alipay') or app_name.startswith( 'Alipay'): app_use.append([time, ip, 'alipay']) elif app_name.startswith('AppStore'): app_use.append([time, ip, 'AppStore']) elif app_name.startswith('360freewifi'): app_use.append([time, ip, '360freewifi']) else: if len(app_name) > 50 and app_name.find('qqnews') > 0: app_use.append([time, ip, 'QQ']) elif len(app_name) > 25 and app_name.find('_weibo_') > 0: app_use.append([time, ip, 'Weibo']) elif len(app_name) > 58 and app_name.find( 'ANDROID_TB') > 0: app_use.append([time, ip, 'Taobao']) else: continue #print df_app_use return app_use
def get_usagent_content(): #file_path_all=get_file('http') file_path_all = '/usr/local/data/http.log' if not os.path.exists(file_path_all): return pd.DataFrame() df = read_file.pandas_normal(file_path_all) usagent_content = df.iloc[:, [0, 2, 12]] #得到[ip,usagent] usagent_content = usagent_content.dropna(axis=0) #过滤内容为空的 result = usagent_content.groupby([0, 2, 12]) #聚合[ip,usagent] device_type_result = [] for usa in result.groups: time_da = time.strftime("%Y--%m--%d %H:%M:%S", time.localtime(int(float(usa[0])))) ip = usa[1] usagent = usa[2] if len(usagent) > 28: if usagent.startswith('Mozilla') or usagent.startswith( 'Safari') or usagent.startswith( 'Opera') or usagent.startswith( 'Dalvik') or usagent.startswith( 'User-Agent'): #常用user-agent #取得括号中内容 if usagent.find('(') > 0: kh_right = usagent.split('(')[1] kh_content = [kh_right.split(')')[0]] for i in kh_content: if i.startswith('X11'): if i.find('AMD64') or i.find('amd64') or i.find( 'ARM' ) or i.find('arm') or i.find('MIPS') or i.find( 'mips') or i.find('ppc') or i.find( 'sparc64') or i.find('i586') or i.find( 'i686') or i.find('x64') or i.find( 'x86') or i.find('x86_64'): device_type_result.append( [time_da, ip, 'Linux', 'Linux']) elif i.startswith('Windows') or i.startswith( 'compatible'): if i.find('Windows NT'): localtion_NT = i.find('Windows NT') localtion_FH = i.find(';', localtion_NT) if localtion_FH > 0 and localtion_NT > 0: Windows_version = i[ localtion_NT:localtion_FH] else: continue device_type_result.append( [time_da, ip, Windows_version, 'Windows']) elif i.find('Windows 98'): device_type_result.append( [time_da, ip, 'Windows 98', 'Windows']) elif i.startswith('x86_64') or i.startswith( 'Macintosh'): device_type_result.append( [time_da, ip, 'Mac', 'IOS']) #IOS设备: #iPhone,iPad,Mac, elif i.startswith('iPhone'): localtion_hx1 = i.find('_') localtion_hx2 = i.find('_', localtion_hx1 + 1, localtion_hx1 + 3) if localtion_hx2 > 0 and localtion_hx1 > 0: iPhone_version = 'iPhone' + ' ' + i[ localtion_hx1 - 2:localtion_hx2 + 2] elif localtion_hx2 < 0: iPhone_version = 'iPhone' + ' ' + i[ localtion_hx1 - 2:localtion_hx1 + 2] else: continue device_type_result.append( [time_da, ip, iPhone_version, 'IOS']) elif i.startswith('iPad'): localtion_hx1 = i.find('_') localtion_hx2 = i.find('_', localtion_hx1 + 1, localtion_hx1 + 5) if localtion_hx2 > 0 and localtion_hx1 > 0: iPad_version = 'iPad' + ' ' + i[ localtion_hx1 - 2:localtion_hx2 + 2] elif localtion_hx2 < 0: iPad_version = 'iPad' + ' ' + i[ localtion_hx1 - 2:localtion_hx1 + 2] else: continue device_type_result.append( [time_da, ip, iPad_version, 'IOS']) elif i.startswith('compatible'): if i.find('Windows Phone') > 0: device_type_result.append( [time_da, ip, 'Windows Phone', 'Windows']) elif i.startswith('Windows Phone'): device_type_result.append( [time_da, ip, 'Windows Phone', 'Windows']) elif i.startswith('Linux') and i.find('Android') > 0: if len(i) > 28: Andriod_version = i.split(';') if len(Andriod_version[-1]) > 9: Andriod_version_result = Andriod_version[ -1] else: Andriod_version_result = Andriod_version[ -2] Andriod_result_temp = Andriod_version_result.split( ' ') Andriod_result = Andriod_result_temp[:-1] if len(Andriod_result) == 4: Andriod_Linux_result = Andriod_result[ 1] + ' ' + Andriod_result[ 2] + ' ' + Andriod_result[3] device_type_result.append([ time_da, ip, Andriod_Linux_result, 'Android' ]) elif len(Andriod_result) == 3: Andriod_Linux_result = Andriod_result[ 1] + ' ' + Andriod_result[2] device_type_result.append([ time_da, ip, Andriod_Linux_result, 'Android' ]) elif len(Andriod_result) == 2: Andriod_Linux_result = Andriod_result[1] device_type_result.append([ time_da, ip, Andriod_Linux_result, 'Android' ]) else: continue else: continue else: continue #没括号 else: continue #不是常用user-agent device_type_result #print df_device_type_result return device_type_result