Python minimal_dtype示例，til.data.utils.utils.minimal_dtype Python示例

示例#1

0

显示文件

文件： Destinie.py 项目： TaxIPP-Life/Til

        def _lecture_BioFam():
            path = os.path.join(path_data_destinie, 'BioFam.txt')
            BioFam = read_table(path, sep=';',
                                   header=None, names=['id', 'pere', 'mere', 'civilstate', 'partner',
                                                       'enf1', 'enf2', 'enf3', 'enf4', 'enf5', 'enf6'])
            # Index limites pour changement de date
            delimiters = BioFam['id'].str.contains('Fin')
            annee = BioFam[delimiters].index.tolist()  # donne tous les index limites
            annee = [-1] + annee # in order to simplify loops later
            # create a series period
            year0 = self.survey_year
            period = []
            for k in range(len(annee) - 1):
                period = period + [year0 + k] * (annee[k + 1] - 1 - annee[k])

            BioFam = BioFam[~delimiters].copy()
            BioFam['period'] = period
            list_enf = ['enf1', 'enf2', 'enf3', 'enf4', 'enf5', 'enf6']
            BioFam[list_enf + ['pere', 'mere', 'partner']] -= 1
            BioFam.loc[:,'id'] = BioFam.loc[:,'id'].astype(int) - 1
            for var in ['pere', 'mere', 'partner'] + list_enf:
                BioFam.loc[BioFam[var] < 0, var] = -1
            BioFam = BioFam.fillna(-1)
#             BioFam = drop_consecutive_row(BioFam.sort(['id', 'period']), ['id', 'pere', 'mere', 'partner', 'civilstate'])
            BioFam.replace(-1, np.nan, inplace=True)
            BioFam = minimal_dtype(BioFam)
            BioFam['civilstate'].replace([2, 1, 4, 3, 5], [1, 2, 3, 4, 5], inplace=True)
            return BioFam

示例#2

0

显示文件

文件： Destinie.py 项目： TaxIPP-Life/Til

        def _BioEmp_in_2():
            ''' Division de BioEmpen trois tables '''
            longueur_carriere = 106 #self.max_dur
            start_time = time.time()
            # TODO: revoir le colnames de BioEmp : le retirer ?
            colnames = list(range(longueur_carriere))
            path = os.path.join(path_data_destinie, 'BioEmp.txt')
            BioEmp = read_table(path, sep=';',
                                   header=None, names=colnames)
            taille = len(BioEmp)/3
            BioEmp['id'] = BioEmp.index/3

            # selection0 : informations atemporelles  sur les individus (identifiant, sexe, date de naissance et âge de fin d'étude)
            selection0 = [3 * x for x in range(taille)]
            ind = BioEmp.iloc[selection0].copy()
            ind.reset_index(inplace=True)
            ind.rename(columns={1:'sexe', 2:'naiss', 3:'findet', 4:'tx_prime_fct'}, inplace=True)

            for column in ind.columns:
                if column in ['sexe', 'naiss', 'findet']:
                    ind[column] = ind[column].astype(int)
                elif column in ['tx_prime_fct']:
                    continue
                else:
                    del ind[column]

            ind['id'] = ind.index

            # selection1 : information sur les statuts d'emploi
            selection1 = [3 * x + 1 for x in range(taille)]
            statut = BioEmp.iloc[selection1].copy()
            statut = np.array(statut.set_index('id').stack().reset_index())
            #statut = statut.rename(columns={'level_1':'period', 0:'workstate'})
            #statut = statut[['id', 'period', 'workstate']] #.fillna(np.nan)
            #statut = minimal_dtype(statut)

            # selection2 : informations sur les salaires
            selection2 = [3 * x + 2 for x in range(taille)]
            sal = BioEmp.iloc[selection2].copy()
            sal = sal.set_index('id').stack().reset_index()
            sal = sal[0]
            #.fillna(np.nan)
            #sal = minimal_dtype(sal)

            # Merge de selection 1 et 2 :
            emp = np.zeros((len(sal), 4))
            emp[:, 0:3] = statut
            emp[:, 3] = sal
            emp = DataFrame(emp, columns=['id', 'period', 'workstate', 'sali'])
            # Mise au format minimal
            emp = emp.fillna(np.nan).replace(-1, np.nan)
            emp = minimal_dtype(emp)
            return ind, emp

示例#3

0

显示文件

文件： Destinie.py 项目： leeseungho90/Til

        def _work_on_futur(futur, ind):
            ''' ajoute l'info sur la date de décès '''
            # On rajoute une ligne par individu pour spécifier leur décès (seulement période != -1)

            def __deces_indicated_lastyearoflife():
#                 dead = DataFrame(index = deces.index.values, columns = futur.columns)
#                 dead['period'][deces.index.values] = deces.values
#                 dead['id'][deces.index.values] = deces.index.values
#                 dead.fillna(-1, inplace=True)
#                 dead['death'] = dead['period']*100 + 1

                dead = DataFrame(deces)
                dead['id'] = dead.index
                dead['death'] = dead['period'] * 100 + 1

                futur = concat([futur, dead], axis=0, ignore_index=True)
                futur.fillna(-1, inplace=True)
                futur = futur.sort(['id', 'period', 'dead']).reset_index().drop('index', 1)
                futur.drop_duplicates(['id', 'period'], inplace=True)
                dead = futur[['id', 'period']].drop_duplicates('id', take_last=True).index
                futur['deces'] = -1
                futur.loc[dead, 'deces'] = 1
                futur = futur.sort(['period', 'id']).reset_index().drop(['index', 'dead'], 1)
                return futur

            def __death_unic_event(futur):
                futur = futur.sort(['id', 'period'])
                no_last = futur.duplicated('id', take_last=True)
                futur['death'] = -1
                cond_death = (no_last == False) & ((futur['workstate'] == 0) | (futur['period'] != 2060))
                futur.loc[cond_death, 'death'] = 100 * futur.loc[cond_death, 'period'] + 1
                futur.loc[(futur['workstate'] != 0) & (futur['death'] != -1), 'death' ] += 1
                add_lines = futur.loc[(futur['period']> futur['death']) & (futur['death'] != -1), 'id']
                if len(add_lines) != 0 :
                    # TODO: prévoir de rajouter une ligne quand il n'existe pas de ligne associée à la date de mort.
                    print len(add_lines)
                    pdb.set_trace()

                return futur

            futur = __death_unic_event(futur)

            # Types minimaux
            futur.replace(-1, np.nan, inplace=True)
            futur = minimal_dtype(futur)
            return futur

示例#4

0

显示文件

文件： Patrimoine.py 项目： TaxIPP-Life/til-core

    def matching_par_enf(self):
        '''
        Matching des parents et des enfants hors du domicile
        '''
        ind = self.ind
        ind = ind.fillna(-1)
        ind.index = ind['id']
        child_out_of_house = self.child_out_of_house
        ## info sur les parents hors du domicile des enfants
        cond_enf_look_par = (ind['per1e'] == 2) | (ind['mer1e'] == 2)
        enf_look_par = ind[cond_enf_look_par].copy()
        # Remarque: avant on mettait à zéro les valeurs quand on ne cherche pas le parent, maintenant
        # on part du principe qu'on fait les choses assez minutieusement
        enf_look_par['dip6'] = recode(enf_look_par['dip14'], [[30,5], [41,4], [43,3], [50,2], [60,1]] , method='geq')
        enf_look_par['classif'] = recode(enf_look_par['classif'], [ [[1,2,3],4], [[4,5],2], [[6,7],1], [[8,9], 3], [[10],0]], method='isin')
        ## nb d'enfant
        # -- Au sein du domicile
        nb_enf_mere_dom = ind.groupby('mere').size()
        nb_enf_pere_dom= ind.groupby('pere').size()
        # On assemble le nombre d'enfants pour les peres et meres en enlevant les manquantes ( = -1)
        enf_tot_dom = concat([nb_enf_mere_dom, nb_enf_pere_dom], axis=0)
        enf_tot_dom = enf_tot_dom.drop([-1])
        # -- Hors domicile
        nb_enf_mere_hdom = child_out_of_house.groupby('mere').size()
        nb_enf_pere_hdom = child_out_of_house.groupby('pere').size()
        enf_tot_hdom = concat([nb_enf_mere_hdom, nb_enf_pere_hdom], axis=0)
        enf_tot_hdom = enf_tot_hdom.drop([-1])

        enf_tot = concat([enf_tot_dom, enf_tot_hdom], axis = 1).fillna(0)
        enf_tot = enf_tot[0] + enf_tot[1]
        # Sélection des parents ayant des enfants (enf_tot) à qui on veut associer des parents (enf_look_par)
        enf_tot = (enf_tot.loc[enf_tot.index.isin(enf_look_par.index)].astype(int)).copy()
        enf_look_par.index = enf_look_par['id']
        enf_look_par['nb_enf'] = 0
        enf_look_par.loc[enf_tot.index.values, 'nb_enf'] = enf_tot
        #Note: Attention le score ne peut pas avoir n'importe quelle forme, il faut des espaces devant les mots, à la limite une parenthèse
        var_match = ['jepnais', 'situa', 'nb_enf', 'anais', 'classif', 'couple', 'dip6', 'jemnais', 'jemprof', 'sexe']
        #TODO: gerer les valeurs nulles, pour l'instant c'est très moche

        #TODO: avoir une bonne distance, on met un gros coeff sur l'age sinon, on a des parents,
        # plus vieux que leurs enfants
        score = "- 1000 * (other.anais - anais) **2 - 1.0 * (other.situa - situa) **2 " + \
        "- 0.5 * (other.sexe - sexe) **2 - 1.0 * (other.dip6 - dip6) **2 " + \
        " - 1.0 * (other.nb_enf - nb_enf) **2"

        # etape1 : deux parents vivants
        cond1_enf = (enf_look_par['per1e'] == 2) & (enf_look_par['mer1e'] == 2)
        cond1_par = (child_out_of_house['pere'] != -1) & (child_out_of_house['mere'] != -1)
        # TODO: si on fait les modif de variables plus tôt, on peut mettre directement child_out_of_house1
        #à cause du append plus haut, on prend en fait ici les premiers de child_out_of_house
        match1 = Matching(enf_look_par.loc[cond1_enf, var_match],
                          child_out_of_house.loc[cond1_par, var_match], score)
        parent_found1 = match1.evaluate(orderby=['anais'], method='cells')
        ind.loc[parent_found1.index.values, ['pere', 'mere']] = child_out_of_house.loc[parent_found1.values, ['pere', 'mere']]

        #etape 2 : seulement mère vivante
        enf_look_par.loc[parent_found1.index, ['pere', 'mere']] = child_out_of_house.loc[parent_found1, ['pere', 'mere']]
        cond2_enf = ((enf_look_par['mere'] == -1)) & (enf_look_par['mer1e'] == 2)
        cond2_par = ~child_out_of_house.index.isin(parent_found1) & (child_out_of_house['mere'] != -1)
        match2 = Matching(enf_look_par.loc[cond2_enf, var_match],
                          child_out_of_house.loc[cond2_par, var_match], score)
        parent_found2 = match2.evaluate(orderby=None, method='cells')
        ind.loc[parent_found2.index, ['mere']] = child_out_of_house.loc[parent_found2, ['mere']]

        #étape 3 : seulement père vivant
        enf_look_par.loc[parent_found2.index, ['pere', 'mere']] = child_out_of_house.loc[parent_found2, ['pere', 'mere']]
        cond3_enf = ((enf_look_par['pere'] == -1)) & (enf_look_par['per1e'] == 2)
        cond3_par = ~child_out_of_house.index.isin(parent_found1) & (child_out_of_house['pere'] != -1)

        # TODO: changer le score pour avoir un lien entre pere et mere plus évident
        match3 = Matching(enf_look_par.loc[cond3_enf, var_match],
                          child_out_of_house.loc[cond3_par, var_match], score)
        parent_found3 = match3.evaluate(orderby=None, method='cells')
        ind.loc[parent_found3.index, ['pere']] = child_out_of_house.loc[parent_found3, ['pere']]

        print(" au départ on fait " + str(len(parent_found1) + len(parent_found2) + len(parent_found3)) + " match enfant-parent hors dom")
        # on retire les match non valides
        to_check = ind[['id', 'agem', 'sexe', 'men', 'partner', 'pere', 'mere', 'lienpref']]
        tab = to_check.copy()
        for lien in ['partner', 'pere', 'mere']:
            tab = tab.merge(to_check, left_on=lien, right_on='id', suffixes=('', '_' + lien), how='left', sort=False)
        tab.index = tab['id']

        for parent in ['pere', 'mere']:
            diff_age_pere = (tab['agem_' + parent] - tab['agem'])
            cond = diff_age_pere <= 12*14
            print( "on retire " + str(sum(cond)) + " lien enfant " + parent +
                   " car l'âge n'était pas le bon")
            ind.loc[cond, parent] = -1

            cond = (tab['partner'] > -1) & (tab[parent] > -1) & \
                    (tab[parent] == tab[parent + '_partner']) & \
                    (tab['men'] != tab['men_' + parent])
            print( "on retire " + str(sum(cond)) + " lien enfant " + parent +
                   " car le partner a le même parent")
            ind.loc[(cond[cond]).index, parent] = -1

        self._check_links(ind)
        self.ind = minimal_dtype(ind)
        all = self.men.columns.tolist()
        enfants_hdom = [x for x in all if x[:3]=='hod']
        self.drop_variable({'ind':['enf', 'per1e', 'mer1e', 'grandpar'] + ['jepnais', 'jemnais', 'jemprof'], 'men':enfants_hdom})

示例#5

0

显示文件

文件： Patrimoine.py 项目： leeseungho90/Til

    def matching_par_enf(self):
        '''
        Matching des parents et des enfants hors du domicile
        '''
        ind = self.ind
        ind = ind.fillna(-1)
        ind.index = ind['id']
        child_out_of_house = self.child_out_of_house
        ## info sur les parents hors du domicile des enfants
        cond_enf_look_par = (ind['per1e']==2) | (ind['mer1e']==2)
        enf_look_par = ind[cond_enf_look_par]
        # Remarque: avant on mettait à zéro les valeurs quand on ne cherche pas le parent, maintenant
        # on part du principe qu'on fait les choses assez minutieusement                                           
        
        recode(enf_look_par, 'dip14', 'dip6', [[30,5], [41,4], [43,3], [50,2], [60,1]] , method='geq')
        recode(enf_look_par, 'classif', 'classif2', [ [[1,2,3],4], [[4,5],2], [[6,7],1], [[8,9], 3], [[10],0]], method='isin')
        enf_look_par.loc[:,'classif'] = enf_look_par.loc[:,'classif2']

        ## nb d'enfant
        # -- Au sein du domicile
        nb_enf_mere_dom = ind.groupby('mere').size()
        nb_enf_pere_dom= ind.groupby('pere').size()
        # On assemble le nombre d'enfants pour les peres et meres en enlevant les manquantes ( = -1)
        enf_tot_dom = concat([nb_enf_mere_dom, nb_enf_pere_dom], axis=0)
        enf_tot_dom = enf_tot_dom.drop([-1])
        
        # -- Hors domicile
        nb_enf_mere_hdom = child_out_of_house.groupby('mere').size()
        nb_enf_pere_hdom = child_out_of_house.groupby('pere').size()
        enf_tot_hdom = concat([nb_enf_mere_hdom, nb_enf_pere_hdom], axis=0)
        enf_tot_hdom = enf_tot_hdom.drop([-1])
        
        enf_tot = concat([enf_tot_dom, enf_tot_hdom], axis = 1).fillna(0)
        enf_tot = enf_tot[0] + enf_tot[1]
        # Sélection des parents ayant des enfants (enf_tot) à qui on veut associer des parents (enf_look_par)
        enf_tot = enf_tot.ix[enf_tot.index.isin(enf_look_par.index)].astype(int)
  
        enf_look_par.index = enf_look_par['id']
        enf_look_par['nb_enf'] = 0
        enf_look_par['nb_enf'][enf_tot.index.values] = enf_tot

        #Note: Attention le score ne peut pas avoir n'importe quelle forme, il faut des espaces devant les mots, à la limite une parenthèse
        var_match = ['jepnais','situa','nb_enf','anais','classif','couple','dip6', 'jemnais','jemprof','sexe']
        #TODO: gerer les valeurs nulles, pour l'instant c'est très moche
        #TODO: avoir une bonne distance
        score = "- 1 * (other.anais - anais) **2 - 1.0 * (other.situa - situa) **2 - 0.5 * (other.sexe - sexe) **2 - 1.0 * (other.dip6 - dip6) \
         **2 - 1.0 * (other.nb_enf - nb_enf) **2"

        # etape1 : deux parents vivants
        cond1_enf = (enf_look_par['per1e'] == 2) & (enf_look_par['mer1e'] == 2)
        cond1_par = (child_out_of_house['pere'] != -1) & (child_out_of_house['mere'] != -1)
        # TODO: si on fait les modif de variables plus tôt, on peut mettre directement child_out_of_house1
        
        #à cause du append plus haut, on prend en fait ici les premiers de child_out_of_house
        match1 = Matching(enf_look_par.ix[cond1_enf, var_match], 
                          child_out_of_house.ix[cond1_par, var_match], score)
        parent_found = match1.evaluate(orderby=None, method='cells')
        ind.ix[parent_found.index.values, ['pere','mere']] = child_out_of_house.ix[parent_found.values, ['pere','mere']]
         
        #etape 2 : seulement mère vivante
        enf_look_par.ix[parent_found.index, ['pere','mere']] = child_out_of_house.ix[parent_found, ['pere','mere']]
        cond2_enf = ((enf_look_par['mere'] == -1)) & (enf_look_par['mer1e'] == 2)
        cond2_par = ~child_out_of_house.index.isin(parent_found) & (child_out_of_house['mere'] != -1)
        match2 = Matching(enf_look_par.ix[cond2_enf, var_match], 
                          child_out_of_house.ix[cond2_par, var_match], score)
        parent_found2 = match2.evaluate(orderby=None, method='cells')
        ind.ix[parent_found2.index, ['mere']] = child_out_of_house.ix[parent_found2, ['mere']]        
        
        #étape 3 : seulement père vivant
        enf_look_par.ix[parent_found2.index, ['pere','mere']] = child_out_of_house.ix[parent_found2, ['pere','mere']]
        cond3_enf = ((enf_look_par['pere'] == -1)) & (enf_look_par['per1e'] == 2)
        cond3_par = ~child_out_of_house.index.isin(parent_found) & (child_out_of_house['pere'] != -1)
        
        # TODO: changer le score pour avoir un lien entre pere et mere plus évident
        match3 = Matching(enf_look_par.ix[cond3_enf, var_match], 
                          child_out_of_house.ix[cond3_par, var_match], score)
        parent_found3 = match3.evaluate(orderby=None, method='cells')
        ind.ix[parent_found3.index, ['pere']] = child_out_of_house.ix[parent_found3, ['pere']]               

        self.ind = minimal_dtype(ind)
        all = self.men.columns.tolist()
        enfants_hdom = [x for x in all if x[:3]=='hod']
        self.drop_variable({'ind':['enf','per1e','mer1e','grandpar'] + ['jepnais','jemnais','jemprof'], 'men':enfants_hdom})

示例#6

0

显示文件

文件： Patrimoine.py 项目： leeseungho90/Til

    def format_initial(self):
        men = self.men      
        ind = self.ind 
        men.index = range(10, len(men)+ 10)
        men['id'] = men.index
        #passage de ind à men, variable ind['men']
        idmen = men[['id', 'identmen']].rename(columns = {'id': 'men'})
        verif_match = len(ind)
        ind = merge(ind, idmen, on = 'identmen')
        if len(ind) != verif_match:
            raise Exception("On a perdu le lien entre ind et men via identmen")
        ind['id'] = ind.index
                # Pour avoir un age plus "continu" sans gap d'une année de naissance à l'autre
        age = self.survey_date/100 - ind['anais']
        ind['age'] = (12*age + 11 - ind['mnais'])/12
        
        dict_rename = {"zsalaires_i":"sali", "zchomage_i":"choi",
        "zpenalir_i":"alr", "zretraites_i":"rsti", "anfinetu":"findet",
        "cyder":"anc", "duree":"xpr"}
        ind = ind.rename(columns=dict_rename)
        
        def _recode_sexe(sexe):
            if sexe.max() == 2:
                sexe.replace(1,0, inplace=True)
                sexe.replace(2,1, inplace=True)
            return sexe

        def _work_on_workstate(ind):
            '''
            On code en s'inspirant de destinie et de PENSIPP ici. 
            Il faudrait voir à modifier pour avoir des temps partiel
            '''

            # inactif   <-  1  # chomeur   <-  2   # non_cadre <-  3  # cadre     <-  4
            # fonct_a   <-  5  # fonct_s   <-  6   # indep     <-  7  # avpf      <-  8
            # preret    <-  9
            #on travaille avec situa puis avec statut puis avec classif
            list_situa_work = [ [[1,2],3], 
                                  [[4],2], 
                                  [[5,6,7],1], 
                                  [[1,2],3] ]
            recode(ind,'situa','workstate', list_situa_work ,'isin')
#           Note:  ind['workstate'][ ind['situa']==3] =  0 : etudiant -> NA
            #precision inactif
            ind.loc[ind['preret']==1, 'workstate'] = 9
            # precision AVPF
            #TODO: "vous pouverz bénéficier de l'AVPF si vous n'exercez aucune activité 
            # professionnelle (ou seulement à temps partiel) et avez 
            # la charge d'une personne handicapée (enfant de moins de 20 ans ou adulte).
            # Pour l'instant, on fait ça parce que ça colle avec PensIPP mais il faudrait faire mieux.
            #en particulier c'est de la législation l'avpf finalement.
            cond =  (men['paje']==1) | (men['complfam']==1) | (men['allocpar']==1) | (men['asf']==1)
            avpf = men.ix[cond,:].index.values + 1 
            ind.loc[(ind['men'].isin(avpf)) & (ind['workstate'].isin([1,2])), 'workstate'] = 8
            # public, privé, indépendant
            ind.loc[ind['statut'].isin([1,2]), 'workstate'] = 5
            ind.loc[ind['statut']==7, 'workstate'] =  7
            # cadre, non cadre
            ind.loc[(ind['classif']==6)  & (ind['workstate']==5), 'workstate'] = 6
            ind.loc[(ind['classif']==7)  & (ind['workstate']==3), 'workstate'] = 4
            #retraite
            ind.loc[(ind['anais'] < 2009-64)  & (ind['workstate']==1), 'workstate'] = 10
            # print ind.groupby(['workstate','statut']).size()
            # print ind.groupby(['workstate','situa']).size()
            ind['workstate'].fillna(1, inplace=True)
            return ind['workstate']
        
        def _work_on_couple(self):
            # 1- Personne se déclarant mariées/pacsées mais pas en couples
            statu_mari = ind[['men','couple','civilstate','pacs','lienpref']].fillna(-1)
            # (a) Si deux mariés/pacsés pas en couple vivent dans le même ménage -> en couple (2)
            prob_couple = (ind['civilstate'].isin([1,5])) & (ind['couple'] == 3) 
            if sum(prob_couple) != 0 :
                statu_marit = statu_mari[prob_couple]
                many_by_men = statu_marit['men'].value_counts() > 1
                many_by_men = statu_marit['men'].value_counts()[many_by_men]
                prob_couple_ident = statu_marit[statu_marit['men'].isin(many_by_men.index.values.tolist())]
                ind.loc[prob_couple_ident.index,'couple'] = 1
                
            # (b) si un marié/pacsé pas en couple est conjoint de la personne de ref -> en couple (0)
            prob_couple = (ind['civilstate'].isin([1,5])) & (ind['couple'] == 3) & (ind['lienpref'] == 1)
            ind.loc[prob_couple_ident.index,'couple'] = 1
            
            # (c) si un marié/pacsé pas en couple est ref et l'unique conjoint déclaré dans le ménage se dit en couple -> en couple (0)
            prob_couple = (ind['civilstate'].isin([1,5])) & (ind['couple'] == 3) & (ind['lienpref'] == 0)
            men_conj = statu_mari[prob_couple]
            men_conj = statu_mari.loc[(statu_mari['men'].isin(men_conj['men'].values))& (statu_mari['lienpref'] == 1), 'men' ].value_counts() == 1
            ind.loc[prob_couple_ident.index,'couple'] = 1

            
            # 2 - Check présence d'un conjoint dans le ménage si couple=1 et lienpref in 0,1
            conj = ind.loc[ind['couple']==1,['men','lienpref','id']]
#             conj['lienpref'].value_counts()
            # pref signifie "personne de reference"
            pref0 = conj.loc[conj['lienpref']==0,'men']
            pref1 = conj.loc[conj['lienpref']==1,'men']
            assert sum(~pref1.isin(pref0)) == 0
            conj_hdom = pref0[~pref0.isin(pref1)]
            ind.loc[conj_hdom.index,'couple'] = 2
            
            # Présence du fils/fille de la personne de ref si déclaration belle-fille/beau-fils
            pref2 = conj.loc[conj['lienpref']==2,'men']
            pref31 = conj.loc[conj['lienpref']==31,'men']
            assert sum(~pref31.isin(pref2)) == 0          
            manque_conj = pref2[~pref2.isin(pref31)]
            ind.loc[manque_conj.index,'couple'] = 2
   
            return ind
        
        ind['sexe'] = _recode_sexe(ind['sexe'])
        ind['workstate'] = _work_on_workstate(ind)
        ind['workstate'] = ind['workstate'].fillna(-1).astype(np.int8)
        #work in findet
        ind['findet'][ind['findet']==0] = np.nan
        ind['findet'] = ind['findet'] - ind['anais']
        ind['findet'][ind['findet'].isnull()] = np.nan
        
        ind = _work_on_couple(ind)
        self.men = men
        self.ind = ind
        self.drop_variable({'men':['identmen','paje','complfam','allocpar','asf'], 'ind':['identmen','preret']})
        
        # Sorties au format minimal
        # format minimal
        ind = self.ind.fillna(-1).replace(-1,np.nan)
        ind = minimal_dtype(ind)
        self.ind = ind