def calc_feature(self, org_train, org_test): if self.datatype == "train": df = org_train df = df.loc[df.installation_id.isin( self.train_labels.installation_id.unique())] else: # 直前までのnum_correct/incorrectを取得する df = org_test c_ass_idx = ((df.type == "Assessment") & (df["event_data"].str.contains("true"))) | \ ((df.type == "Assessment") & (df["event_data"].str.contains("true"))) inc_ass_idx = ((df.type == "Assessment") & (df["event_data"].str.contains("false"))) | \ ((df.type == "Assessment") & (df["event_data"].str.contains("false"))) df.loc[c_ass_idx, 'num_correct'] = 1 df.loc[inc_ass_idx, 'num_incorrect'] = 1 ret = applyParallel(df.groupby("installation_id"), self._calc_features) self.format_and_save_feats(ret) return ret
def calc_feature(self, org_train, org_test): if self.datatype == "train": df = org_train df = df.loc[df.installation_id.isin( self.train_labels.installation_id.unique())] else: # 直前までのnum_correct/incorrectを取得する df = org_test ret = applyParallel(df.groupby("installation_id"), self.ins_id_sessions) ret_col = [ c for c in list(ret.columns) if c not in [ "accuracy", "accuracy_group", "cum_accuracy", "game_session", "installation_id", "title", "type" ] ] # self.format_and_save_feats(ret) use_cols = [ c for c in list(ret.columns) if c not in [ "accuracy", "accuracy_group", "cum_accuracy", "title", "type", "event_code", "gs_max_time" ] ] self.format_and_save_feats(ret[use_cols]) return ret[use_cols]
def calc_feature(self, org_train, org_test): if self.datatype == "train": df = org_train assess_user = df.loc[df.type == "Assessment"].installation_id.unique() df = df.loc[df.installation_id.isin(assess_user)] else: # 直前までのnum_correct/incorrectを取得する org_test.loc[(org_test.event_code.isin([4100, 4110])) & (org_test["event_data"].str.contains("true")), 'num_correct'] = 1 org_test.loc[(org_test.event_code.isin([4100, 4110])) & (org_test["event_data"].str.contains("false")), 'num_incorrect'] = 1 df = org_test # get encodings informations self.get_encoder(org_train, org_test) ret = applyParallel(df.groupby("installation_id"), self.ins_id_sessions) use_cols = [ c for c in list(ret.columns) if c not in [ "accuracy", "accuracy_group", "cum_accuracy", "title", "type", "event_code", "gs_max_time" ] ] self.format_and_save_feats(ret[use_cols]) return ret[use_cols]
def calc_feature(self, org_train, org_test): if self.datatype == "train": df = org_train assess_user = df.loc[df.type == "Assessment"].installation_id.unique() df = df.loc[df.installation_id.isin(assess_user)] else: # 直前までのnum_correct/incorrectを取得する df = org_test ret = applyParallel(df.groupby("installation_id"), self.count_sessions) ret_col = [ c for c in list(ret.columns) if c not in ["game_session", "installation_id", "title", "type"] ] ret[ret_col] = ret[ret_col].fillna(0).astype("int32") use_cols = [ c for c in list(ret.columns) if c not in ["title", "type", "event_code", "gs_max_time"] ] self.format_and_save_feats(ret[use_cols]) return ret[use_cols]
def calc_feature(self, df): target_c_ass_idx = ((df.event_code == 4100) & (df.title != "Bird Measurer (Assessment)") & (df.type == "Assessment") & (df["event_data"].str.contains("true"))) | \ ((df.event_code == 4110) & (df.title == "Bird Measurer (Assessment)") & (df.type == "Assessment") & (df["event_data"].str.contains("true"))) target_inc_ass_idx = ((df.event_code == 4100) & (df.title != "Bird Measurer (Assessment)") & (df.type == "Assessment") & (df["event_data"].str.contains("false"))) | \ ((df.event_code == 4110) & (df.title == "Bird Measurer (Assessment)") & (df.type == "Assessment") & (df["event_data"].str.contains("false"))) df.loc[target_c_ass_idx, 'num_correct'] = 1 df.loc[target_inc_ass_idx, 'num_incorrect'] = 1 df = df[(df.type == 'Assessment') & (((df.event_code == 4100) & (df.title != 'Bird Measurer (Assessment)')) | ((df.event_code == 4110) & (df.title == 'Bird Measurer (Assessment)')))] print(df.shape) ret = applyParallel(df.groupby("installation_id"), self._calc_features) self.format_and_save_feats(ret) return ret
def calc_feature(self, org_train, org_test): if self.datatype == "train": df = org_train df = df.loc[df.installation_id.isin( self.train_labels.installation_id.unique())] else: df = org_test c_ass_idx = (((df.event_code == 4100) & (df.title != "Bird Measurer (Assessment)") & (df["event_data"].str.contains("true"))) | ((df.event_code == 4110) & (df.title == "Bird Measurer (Assessment)") & (df["event_data"].str.contains("true"))) & (df["type"] == "Assessment")) inc_ass_idx = (((df.event_code == 4100) & (df.title != "Bird Measurer (Assessment)") & (df["event_data"].str.contains("false"))) | ((df.event_code == 4110) & (df.title == "Bird Measurer (Assessment)") & (df["event_data"].str.contains("false"))) & (df["type"] == "Assessment")) df.loc[c_ass_idx, 'num_correct'] = 1 df.loc[inc_ass_idx, 'num_incorrect'] = 1 ret = applyParallel( df.groupby("installation_id"), self.ins_id_sessions) ret_col = [c for c in list(ret.columns) if c not in ["accuracy", "accuracy_group", "cum_accuracy", "game_session", "installation_id", "title", "type" ]] use_cols = [c for c in list(ret.columns) if "Assessment" not in c] del ret["accum_acc_gr_-99"], ret["prev_acc_gr_-99"] fill_cols = [ c for c in list( ret.columns) if c not in [ "cum_accuracy", "cum_accuracy", "prev_num_corrects", "prev_num_incorrects"]] ret[fill_cols] = ret[fill_cols].fillna(0) if self.datatype == "train": ret = pd.merge( ret, self.train_labels, how="inner", on=[ "installation_id", "game_session"]) self.format_and_save_feats(ret) return ret
def calc_feature(self, org_train, org_test): if self.datatype == "train": df = org_train df = df.loc[df.installation_id.isin( self.train_labels.installation_id.unique())] else: # 直前までのnum_correct/incorrectを取得する df = org_test ret = applyParallel(df.groupby("installation_id"), self._calc_features) self.format_and_save_feats(ret) return ret
def calc_feature(self, org_train, org_test): # 直前までのnum_correct/incorrectを取得する if self.datatype == "train": df = org_train else: df = org_test c_ass_idx = ((df.event_code == 4100) & (df.type == "Assessment") & (df.title != "Bird Measurer (Assessment)") & (df["event_data"].str.contains("true"))) | \ ((df.event_code == 4110) & (df.type == "Assessment") & (df.title == "Bird Measurer (Assessment)") & (df["event_data"].str.contains("true"))) inc_ass_idx = ((df.event_code == 4100) & (df.type == "Assessment") & (df.title != "Bird Measurer (Assessment)") & (df["event_data"].str.contains("false"))) | \ ((df.event_code == 4110) & (df.type == "Assessment") & (df.title == "Bird Measurer (Assessment)") & (df["event_data"].str.contains("false"))) df.loc[c_ass_idx, 'num_correct'] = 1 df.loc[inc_ass_idx, 'num_incorrect'] = 1 ret = applyParallel(df.groupby("installation_id"), self.ins_id_sessions) #_ret = [] #for grp_id, grp_df in tqdm(df.groupby("installation_id")): # _ret.append(self.ins_id_sessions(grp_df)) #ret = pd.concat(_ret) ret_col = [ c for c in list(ret.columns) if c not in [ "accuracy", "accuracy_group", "cum_accuracy", "game_session", "installation_id", "title", "type" ] ] ret[ret_col] = ret[ret_col].fillna(0).astype("int32") self.format_and_save_feats(ret) use_cols = [c for c in list(ret.columns) if "Assessment" not in c] return ret[use_cols]
def calc_feature(self, org_train, org_test): if self.datatype == "train": df = org_train df = df.loc[df.installation_id.isin( self.train_labels.installation_id.unique())] else: # 直前までのnum_correct/incorrectを取得する df = org_test c_ass_idx = ((df.event_code == 4100) & (df.title != "Bird Measurer (Assessment)") & (df["event_data"].str.contains("true"))) | \ ((df.event_code == 4110) & (df.title == "Bird Measurer (Assessment)") & (df["event_data"].str.contains("true"))) inc_ass_idx = ((df.event_code == 4100) & (df.title != "Bird Measurer (Assessment)") & (df["event_data"].str.contains("false"))) | \ ((df.event_code == 4110) & (df.title == "Bird Measurer (Assessment)") & (df["event_data"].str.contains("false"))) df.loc[c_ass_idx, 'num_correct'] = 1 df.loc[inc_ass_idx, 'num_incorrect'] = 1 ret = applyParallel(df.groupby("installation_id"), self.ins_id_sessions) ret_col = [ c for c in list(ret.columns) if c not in [ "accuracy", "accuracy_group", "cum_accuracy", "game_session", "installation_id", "title", "type" ] ] ret[ret_col] = ret[ret_col].fillna(0).astype("int32") use_cols = [c for c in list(ret.columns) if "Assessment" not in c] ret = ret[use_cols] self.format_and_save_feats(ret) return ret