def test_sum(self): """最后返回的列表""" tran_date = '20191127' dr_cr_flag = '2' x = utils.get_time_range_dtl(tran_date, dr_cr_flag) y = utils.get_sum(x) for k in y: print(k)
# -*- coding: utf-8 -*- # @Time : 2020/4/24 19:02 # @Author : Mqz # @FileName: main.py # main.py from utils import get_sum from class_utils import * print(get_sum(1, 2)) encoder = Encoder() decoder = Decoder() print(encoder.encode('abcde')) print(decoder.decode('edcba')) # ########## 输出 ########## # # 3 # edcba # abcde
#! /usr/bin/python3 # coding=utf-8 from utils import get_sum print('get_sum: ', get_sum(1, 2))
def process(self): # fill na for column in self.nanum_columns: print("Fill NA {}".format(column)) self.df_all[column].fillna(-1, inplace=True) for column in self.nastr_columns: print("Fill NA {}".format(column)) self.df_all[column].fillna("", inplace=True) # new features self.df_all["dstipscope_dominate"] = self.df_all.apply( lambda row: utils.get_ip_scope(row["dstipcategory_dominate"]), axis=1) self.df_all["srcipscope_dominate"] = self.df_all.apply( lambda row: utils.get_ip_scope(row["srcipcategory_dominate"]), axis=1) # ip zone features self.df_all["ip_zone_1"] = self.df_all.apply( lambda row: utils.get_ip_zone(row["ip"], 1), axis=1) self.df_all["ip_zone_2"] = self.df_all.apply( lambda row: utils.get_ip_zone(row["ip"], 2), axis=1) self.df_all["ip_zone_3"] = self.df_all.apply( lambda row: utils.get_ip_zone(row["ip"], 3), axis=1) self.df_all["ip_zone_4"] = self.df_all.apply( lambda row: utils.get_ip_zone(row["ip"], 4), axis=1) # concatenation features self.df_all["ip_zone_12"] = self.df_all.apply( lambda row: utils.concatenate_values( [row["ip_zone_1"], row["ip_zone_2"]]), axis=1) self.df_all["ip_zone_123"] = self.df_all.apply( lambda row: utils.concatenate_values( [row["ip_zone_1"], row["ip_zone_2"], row["ip_zone_3"]]), axis=1) self.df_all["ip_zone_34"] = self.df_all.apply( lambda row: utils.concatenate_values( [row["ip_zone_3"], row["ip_zone_4"]]), axis=1) self.df_all["ip_zone_234"] = self.df_all.apply( lambda row: utils.concatenate_values( [row["ip_zone_2"], row["ip_zone_3"], row["ip_zone_4"]]), axis=1) self.le_columns.append("ip_zone_12") self.le_columns.append("ip_zone_123") self.le_columns.append("ip_zone_34") self.le_columns.append("ip_zone_234") feature_pairs = [("categoryname", "ipcategory_scope"), \ ("categoryname", "overallseverity"), \ ("srcipscope_dominate", "dstipscope_dominate")] for item in feature_pairs: f1 = item[0] f2 = item[1] fn = f1 + "_" + f2 self.df_all[fn] = self.df_all.apply( lambda row: utils.concatenate_values([row[f1], row[f2]]), axis=1) self.le_columns.append(fn) # timestamp_dist in hour and minute self.df_all["timestamp_hour"] = self.df_all.apply( lambda row: utils.get_duration(row["timestamp_dist"]), axis=1) # ending time features self.df_all["end_hour"] = self.df_all.apply( lambda row: utils.get_end_time(row["start_hour"], row[ "start_minute"], row["start_second"], row["timestamp_dist"], "hour"), axis=1) self.df_all["end_minute"] = self.df_all.apply( lambda row: utils.get_end_time(row["start_hour"], row[ "start_minute"], row["start_second"], row["timestamp_dist"], "minute"), axis=1) self.df_all["end_second"] = self.df_all.apply( lambda row: utils.get_end_time(row["start_hour"], row[ "start_minute"], row["start_second"], row["timestamp_dist"], "second"), axis=1) # sum score features self.df_all["sum_score"] = self.df_all.apply( lambda row: utils.get_sum([ row["{}score".format(score)] for score in ["untrust", "flow", "trust", "enforcement"] ]), axis=1) self.df_all["sum_n"] = self.df_all.apply(lambda row: utils.get_sum( [row["n{}".format(i)] for i in range(1, 11)]), axis=1) self.df_all["sum_p5"] = self.df_all.apply(lambda row: utils.get_sum( [row["p5{}".format(p5)] for p5 in ["m", "w", "d"]]), axis=1) self.df_all["sum_p8"] = self.df_all.apply(lambda row: utils.get_sum( [row["p8{}".format(p8)] for p8 in ["m", "w", "d"]]), axis=1) #self.df_all["sum_p58"] = self.df_all.apply(lambda row: utils.get_sum([row["sum_p5"], row["sum_p8"]]), axis = 1) # get ratio features # self.df_all["thrcnt_month_week"] = self.df_all.apply(lambda row: utils.get_ratio(row["thrcnt_month"], row["thrcnt_week"]), axis = 1) self.df_all["thrcnt_month_day"] = self.df_all.apply( lambda row: utils.get_ratio(row["thrcnt_month"], row["thrcnt_day"] ), axis=1) self.df_all["thrcnt_week_day"] = self.df_all.apply( lambda row: utils.get_ratio(row["thrcnt_week"], row["thrcnt_day"]), axis=1) # encode features with label encoder label_encoder = LabelEncoder() for column in self.le_columns: print("Label encoding {}".format(column)) label_encoder.fit(self.df_all[column]) self.df_all[column] = label_encoder.transform(self.df_all[column]) # encode features with one-hot encoder for column in self.oe_columns: print("One-hot encoding {}".format(column)) pd_encoded = pd.get_dummies(self.df_all[column]) pd_encoded.columns = [ "{}_{}".format(column, "_".join(str(col).lower().split())) for col in pd_encoded.columns ] self.df_all.drop(column, axis=1, inplace=True) self.df_all = pd.concat([self.df_all, pd_encoded], axis=1)