예제 #1
0
    def calc_feature(self, org_train, org_test):
        if self.datatype == "train":
            df = org_train
            df = df.loc[df.installation_id.isin(
                self.train_labels.installation_id.unique())]
        else:
            # 直前までのnum_correct/incorrectを取得する
            df = org_test

        c_ass_idx = ((df.type == "Assessment")
                     & (df["event_data"].str.contains("true"))) | \
            ((df.type == "Assessment")
             & (df["event_data"].str.contains("true")))

        inc_ass_idx = ((df.type == "Assessment")
                       & (df["event_data"].str.contains("false"))) | \
            ((df.type == "Assessment")
             & (df["event_data"].str.contains("false")))

        df.loc[c_ass_idx, 'num_correct'] = 1
        df.loc[inc_ass_idx, 'num_incorrect'] = 1

        ret = applyParallel(df.groupby("installation_id"), self._calc_features)

        self.format_and_save_feats(ret)
        return ret
    def calc_feature(self, org_train, org_test):
        if self.datatype == "train":
            df = org_train
            df = df.loc[df.installation_id.isin(
                self.train_labels.installation_id.unique())]
        else:
            # 直前までのnum_correct/incorrectを取得する
            df = org_test

        ret = applyParallel(df.groupby("installation_id"),
                            self.ins_id_sessions)
        ret_col = [
            c for c in list(ret.columns) if c not in [
                "accuracy", "accuracy_group", "cum_accuracy", "game_session",
                "installation_id", "title", "type"
            ]
        ]
        #         self.format_and_save_feats(ret)

        use_cols = [
            c for c in list(ret.columns) if c not in [
                "accuracy", "accuracy_group", "cum_accuracy", "title", "type",
                "event_code", "gs_max_time"
            ]
        ]
        self.format_and_save_feats(ret[use_cols])

        return ret[use_cols]
예제 #3
0
    def calc_feature(self, org_train, org_test):
        if self.datatype == "train":
            df = org_train
            assess_user = df.loc[df.type ==
                                 "Assessment"].installation_id.unique()
            df = df.loc[df.installation_id.isin(assess_user)]
        else:
            # 直前までのnum_correct/incorrectを取得する
            org_test.loc[(org_test.event_code.isin([4100, 4110])) &
                         (org_test["event_data"].str.contains("true")),
                         'num_correct'] = 1
            org_test.loc[(org_test.event_code.isin([4100, 4110])) &
                         (org_test["event_data"].str.contains("false")),
                         'num_incorrect'] = 1
            df = org_test

        # get encodings informations
        self.get_encoder(org_train, org_test)

        ret = applyParallel(df.groupby("installation_id"),
                            self.ins_id_sessions)
        use_cols = [
            c for c in list(ret.columns) if c not in [
                "accuracy", "accuracy_group", "cum_accuracy", "title", "type",
                "event_code", "gs_max_time"
            ]
        ]

        self.format_and_save_feats(ret[use_cols])
        return ret[use_cols]
예제 #4
0
    def calc_feature(self, org_train, org_test):
        if self.datatype == "train":
            df = org_train
            assess_user = df.loc[df.type ==
                                 "Assessment"].installation_id.unique()
            df = df.loc[df.installation_id.isin(assess_user)]
        else:
            # 直前までのnum_correct/incorrectを取得する
            df = org_test

        ret = applyParallel(df.groupby("installation_id"), self.count_sessions)
        ret_col = [
            c for c in list(ret.columns)
            if c not in ["game_session", "installation_id", "title", "type"]
        ]
        ret[ret_col] = ret[ret_col].fillna(0).astype("int32")

        use_cols = [
            c for c in list(ret.columns)
            if c not in ["title", "type", "event_code", "gs_max_time"]
        ]

        self.format_and_save_feats(ret[use_cols])

        return ret[use_cols]
예제 #5
0
    def calc_feature(self, df):
        target_c_ass_idx = ((df.event_code == 4100)
                            & (df.title != "Bird Measurer (Assessment)")
                            & (df.type == "Assessment")
                            & (df["event_data"].str.contains("true"))) | \
            ((df.event_code == 4110)
             & (df.title == "Bird Measurer (Assessment)")
             & (df.type == "Assessment")
             & (df["event_data"].str.contains("true")))

        target_inc_ass_idx = ((df.event_code == 4100)
                              & (df.title != "Bird Measurer (Assessment)")
                              & (df.type == "Assessment")
                              & (df["event_data"].str.contains("false"))) | \
            ((df.event_code == 4110)
             & (df.title == "Bird Measurer (Assessment)")
             & (df.type == "Assessment")
             & (df["event_data"].str.contains("false")))

        df.loc[target_c_ass_idx, 'num_correct'] = 1
        df.loc[target_inc_ass_idx, 'num_incorrect'] = 1

        df = df[(df.type == 'Assessment')
                & (((df.event_code == 4100)
                    & (df.title != 'Bird Measurer (Assessment)'))
                   | ((df.event_code == 4110)
                      & (df.title == 'Bird Measurer (Assessment)')))]
        print(df.shape)

        ret = applyParallel(df.groupby("installation_id"), self._calc_features)

        self.format_and_save_feats(ret)
        return ret
    def calc_feature(self, org_train, org_test):
        if self.datatype == "train":
            df = org_train
            df = df.loc[df.installation_id.isin(
                self.train_labels.installation_id.unique())]
        else:
            df = org_test

        c_ass_idx = (((df.event_code == 4100)
                      & (df.title != "Bird Measurer (Assessment)")
                      & (df["event_data"].str.contains("true"))) |
                     ((df.event_code == 4110)
                      & (df.title == "Bird Measurer (Assessment)")
                      & (df["event_data"].str.contains("true"))) & (df["type"] == "Assessment"))

        inc_ass_idx = (((df.event_code == 4100)
                        & (df.title != "Bird Measurer (Assessment)")
                        & (df["event_data"].str.contains("false"))) |
                       ((df.event_code == 4110)
                        & (df.title == "Bird Measurer (Assessment)")
                        & (df["event_data"].str.contains("false"))) & (df["type"] == "Assessment"))

        df.loc[c_ass_idx, 'num_correct'] = 1
        df.loc[inc_ass_idx, 'num_incorrect'] = 1

        ret = applyParallel(
            df.groupby("installation_id"),
            self.ins_id_sessions)
        ret_col = [c for c in list(ret.columns) if c not in ["accuracy", "accuracy_group", "cum_accuracy",
                                                             "game_session", "installation_id", "title",
                                                             "type"
                                                             ]]

        use_cols = [c for c in list(ret.columns) if "Assessment" not in c]
        del ret["accum_acc_gr_-99"], ret["prev_acc_gr_-99"]

        fill_cols = [
            c for c in list(
                ret.columns) if c not in [
                "cum_accuracy",
                "cum_accuracy",
                "prev_num_corrects",
                "prev_num_incorrects"]]
        ret[fill_cols] = ret[fill_cols].fillna(0)

        if self.datatype == "train":
            ret = pd.merge(
                ret, self.train_labels, how="inner", on=[
                    "installation_id", "game_session"])

        self.format_and_save_feats(ret)

        return ret
예제 #7
0
    def calc_feature(self, org_train, org_test):
        if self.datatype == "train":
            df = org_train
            df = df.loc[df.installation_id.isin(
                self.train_labels.installation_id.unique())]
        else:
            # 直前までのnum_correct/incorrectを取得する
            df = org_test

        ret = applyParallel(df.groupby("installation_id"), self._calc_features)

        self.format_and_save_feats(ret)
        return ret
    def calc_feature(self, org_train, org_test):
        # 直前までのnum_correct/incorrectを取得する
        if self.datatype == "train":
            df = org_train
        else:
            df = org_test

        c_ass_idx = ((df.event_code == 4100)
                     & (df.type == "Assessment")
                     & (df.title != "Bird Measurer (Assessment)")
                     & (df["event_data"].str.contains("true"))) | \
            ((df.event_code == 4110)
             & (df.type == "Assessment")
             & (df.title == "Bird Measurer (Assessment)")
             & (df["event_data"].str.contains("true")))

        inc_ass_idx = ((df.event_code == 4100)
                       & (df.type == "Assessment")
                       & (df.title != "Bird Measurer (Assessment)")
                       & (df["event_data"].str.contains("false"))) | \
            ((df.event_code == 4110)
             & (df.type == "Assessment")
             & (df.title == "Bird Measurer (Assessment)")
             & (df["event_data"].str.contains("false")))

        df.loc[c_ass_idx, 'num_correct'] = 1
        df.loc[inc_ass_idx, 'num_incorrect'] = 1

        ret = applyParallel(df.groupby("installation_id"),
                            self.ins_id_sessions)
        #_ret = []
        #for grp_id, grp_df in tqdm(df.groupby("installation_id")):
        #    _ret.append(self.ins_id_sessions(grp_df))
        #ret = pd.concat(_ret)
        ret_col = [
            c for c in list(ret.columns) if c not in [
                "accuracy", "accuracy_group", "cum_accuracy", "game_session",
                "installation_id", "title", "type"
            ]
        ]

        ret[ret_col] = ret[ret_col].fillna(0).astype("int32")
        self.format_and_save_feats(ret)

        use_cols = [c for c in list(ret.columns) if "Assessment" not in c]

        return ret[use_cols]
    def calc_feature(self, org_train, org_test):
        if self.datatype == "train":
            df = org_train
            df = df.loc[df.installation_id.isin(
                self.train_labels.installation_id.unique())]
        else:
            # 直前までのnum_correct/incorrectを取得する
            df = org_test

        c_ass_idx = ((df.event_code == 4100)
                     & (df.title != "Bird Measurer (Assessment)")
                     & (df["event_data"].str.contains("true"))) | \
            ((df.event_code == 4110)
             & (df.title == "Bird Measurer (Assessment)")
             & (df["event_data"].str.contains("true")))

        inc_ass_idx = ((df.event_code == 4100)
                       & (df.title != "Bird Measurer (Assessment)")
                       & (df["event_data"].str.contains("false"))) | \
            ((df.event_code == 4110)
             & (df.title == "Bird Measurer (Assessment)")
             & (df["event_data"].str.contains("false")))

        df.loc[c_ass_idx, 'num_correct'] = 1
        df.loc[inc_ass_idx, 'num_incorrect'] = 1

        ret = applyParallel(df.groupby("installation_id"),
                            self.ins_id_sessions)
        ret_col = [
            c for c in list(ret.columns) if c not in [
                "accuracy", "accuracy_group", "cum_accuracy", "game_session",
                "installation_id", "title", "type"
            ]
        ]
        ret[ret_col] = ret[ret_col].fillna(0).astype("int32")

        use_cols = [c for c in list(ret.columns) if "Assessment" not in c]
        ret = ret[use_cols]

        self.format_and_save_feats(ret)

        return ret