示例#1
0
    def _fetch_data(self):
        """ 获取建模数据 """
        session = get_db_session()
        objs = session.query(Pinkunhu2015).filter(
            Pinkunhu2015.county == 'A县',
            Pinkunhu2015.ny_person_income != -1,
            Pinkunhu2015.person_year_total_income > 0,
            Pinkunhu2015.person_year_total_income < 7000,
        ).all()
        X, Y = [], []
        for item in objs:
            col_list = []
            for col in self.features:
                normalized_value = normalize(col, getattr(item, col))
                col_list.append(normalized_value)
            X.append(col_list)
            normalized_value = normalize(self.target,
                                         getattr(item, self.target))
            Y.append(normalized_value)

        # # 筛掉可能有错误的数据
        # 人均年收入除以100后,查看分布,少于5次的不纳入模型, 效果不佳,废弃
        # df = pd.DataFrame(X, columns=self.features)
        # print '#df.shape:', df.shape
        # df['person_year_total_income'] = df['person_year_total_income'] / 100
        # df['person_year_total_income'] = df['person_year_total_income'].astype(int)
        # df['person_year_total_income'] = df['person_year_total_income'] * 100
        # df = df.groupby('person_year_total_income').filter(lambda x: len(x) > 5)
        # print '#df.shape:', df.shape
        # X, Y = df.loc[:, self.features[:-1]], df.loc[:, self.target]

        return X, Y
示例#2
0
    def _fetch_test_data(self):
        """ 获取测试数据 """
        session = get_db_session()
        objs = session.query(Pinkunhu2015).filter(
            Pinkunhu2015.county == '彝良县').all()
        X, Y = [], []
        for item in objs:
            col_list = []
            for col in [
                    'tv', 'washing_machine', 'fridge', 'reason',
                    'is_danger_house', 'is_back_poor', 'is_debt', 'standard',
                    'arable_land', 'debt_total', 'living_space',
                    'member_count', 'person_year_total_income',
                    'year_total_income', 'subsidy_total', 'wood_land',
                    'xin_nong_he_total', 'xin_yang_lao_total', 'call_number',
                    'bank_name', 'bank_number', 'help_plan'
            ]:

                normalized_value = normalize(col, getattr(item, col))
                col_list.append(normalized_value)
            X.append(col_list)
            normalized_value = normalize('poor_status',
                                         getattr(item, 'poor_status'))
            Y.append(normalized_value)

        return X, Y
示例#3
0
    def _fetch_test_data(self):
        """ 获取测试数据 """
        session = get_db_session()
        objs = session.query(Pinkunhu2015).filter(
            Pinkunhu2015.county == 'B县',
            Pinkunhu2015.ny_person_income != -1,
            Pinkunhu2015.person_year_total_income > 0,
            Pinkunhu2015.person_year_total_income < 7000,
            Pinkunhu2015.ny_person_income > 0,
            Pinkunhu2015.ny_person_income < 7000,
        ).all()
        X, Y = [], []
        for item in objs:
            col_list = []
            for col in self.features:
                normalized_value = normalize(col, getattr(item, col))
                col_list.append(normalized_value)
            X.append(col_list)
            normalized_value = normalize(self.target,
                                         getattr(item, self.target))
            Y.append(normalized_value)

        # 设置虚拟变量
        df = pd.DataFrame(X, columns=self.features)
        for item in self.dummy_features:
            dummies = pd.get_dummies(df[item], prefix=item)
            df = df.join(dummies)
        # 删除已设置虚拟变量的原变量
        df = df.drop(self.dummy_features, axis=1)
        X = df.loc[:]

        return X, Y
示例#4
0
    def _fetch_data(self):
        """ 获取建模数据 """
        session = get_db_session()
        objs = session.query(Pinkunhu2015).filter(Pinkunhu2015.county == 'A县').all()
        X, Y = [], []
        for item in objs:
            col_list = []
            for col in self.features:
                normalized_value = normalize(col, getattr(item, col))
                col_list.append(normalized_value)
            X.append(col_list)
            normalized_value = normalize(self.target, getattr(item, self.target))
            Y.append(normalized_value)

        return X, Y
示例#5
0
    def _fetch_test_data(self):
        """ 获取测试数据 """
        session = get_db_session()
        objs = session.query(Pinkunhu2015).filter(
                Pinkunhu2015.county == 'B县', Pinkunhu2015.ny_person_income != -1,
                Pinkunhu2015.person_year_total_income > 0, Pinkunhu2015.person_year_total_income < 7000,
        ).all()
        X, Y = [], []
        for item in objs:
            col_list = []
            for col in self.features:
                normalized_value = normalize(col, getattr(item, col))
                col_list.append(normalized_value)
            X.append(col_list)
            normalized_value = normalize(self.target, getattr(item, self.target))
            Y.append(normalized_value)

        return X, Y