def analyze_numlangedition_per_profession(self):
        classes = self.people["class"].unique()
        print(self.people["class"].nunique())
        # print classes

        res = pd.DataFrame(columns=("class", "mean-men", "sem-men", "mean-women", "sem-women"))
        for pclass in classes:

            people = self.people[self.people["class"] == pclass]

            print(pclass)
            classname = pclass.split("/")[-1]
            print(classname)
            resdict = self.analyze_num_lang_edition(classname, people)
            res = res.append(resdict, ignore_index=True)

        ut.plot_shaded_lines(
            res["class"].values,
            res["mean-women"].values,
            res["mean-men"].values,
            res["sem-women"].values,
            res["sem-women"].values,
            "mean num editions",
            "professions",
            "img/numedition_gender_deacdes.png",
        )
Exemplo n.º 2
0
    def analyze_numlangedition_per_profession(self):
        classes = self.people["class"].unique()
        print(self.people["class"].nunique())
        #print classes

        res = pd.DataFrame(columns=('class', 'mean-men', "sem-men",
                                    'mean-women', 'sem-women'))
        for pclass in classes:

            people = self.people[self.people["class"] == pclass]

            print(pclass)
            classname = pclass.split("/")[-1]
            print(classname)
            resdict = self.analyze_num_lang_edition(classname, people)
            res = res.append(resdict, ignore_index=True)

        ut.plot_shaded_lines(res["class"].values, res["mean-women"].values,
                             res["mean-men"].values, res["sem-women"].values,
                             res["sem-women"].values, 'mean num editions',
                             'professions',
                             'img/numedition_gender_deacdes.png')
    def analyze_numlangedition_per_decade(self):
        interval = 10
        decade = 1000
        # decades_of_interest = np.arange(0, 2010, interval)

        res = pd.DataFrame(columns=("class", "mean-men", "sem-men", "mean-women", "sem-women"))
        # for decade in decades_of_interest:
        print(self.people.shape)
        print(self.people.head(n=1))

        # print (self.people[(self.people.birth_year < 1000) & (self.people.birth_year > 0)].shape)
        people = self.people[(self.people.birth_year < decade) & (self.people.birth_year >= 0)]
        print(people.shape)
        resdict = self.analyze_num_lang_edition("0-1000", people)
        res = res.append(resdict, ignore_index=True)

        while decade < 2015:

            end = decade + interval

            people = self.people[(self.people.birth_year >= decade) & (self.people.birth_year < end)]
            print(decade)
            print(decade + interval)
            print(people.shape)
            resdict = self.analyze_num_lang_edition(str(decade) + "-" + str(end), people)
            res = res.append(resdict, ignore_index=True)
            decade = end

        ut.plot_shaded_lines(
            res["class"].values,
            res["mean-women"].values,
            res["mean-men"].values,
            res["sem-women"].values,
            res["sem-women"].values,
            "Mean Num Editions",
            "Birth Year",
            "img/numedition_gender_deacdes" + self.pre + "-" + self.post + ".png",
        )
Exemplo n.º 4
0
    def analyze_numlangedition_per_decade(self):
        interval = 10
        decade = 1000
        #decades_of_interest = np.arange(0, 2010, interval)

        res = pd.DataFrame(columns=('class', 'mean-men', "sem-men",
                                    'mean-women', 'sem-women'))
        #for decade in decades_of_interest:
        print(self.people.shape)
        print(self.people.head(n=1))

        #print (self.people[(self.people.birth_year < 1000) & (self.people.birth_year > 0)].shape)
        people = self.people[(self.people.birth_year < decade)
                             & (self.people.birth_year >= 0)]
        print(people.shape)
        resdict = self.analyze_num_lang_edition("0-1000", people)
        res = res.append(resdict, ignore_index=True)

        while (decade < 2015):

            end = decade + interval

            people = self.people[(self.people.birth_year >= decade)
                                 & (self.people.birth_year < end)]
            print(decade)
            print(decade + interval)
            print(people.shape)
            resdict = self.analyze_num_lang_edition(
                str(decade) + "-" + str(end), people)
            res = res.append(resdict, ignore_index=True)
            decade = end

        ut.plot_shaded_lines(
            res["class"].values, res["mean-women"].values,
            res["mean-men"].values, res["sem-women"].values,
            res["sem-women"].values, 'Mean Num Editions', 'Birth Year',
            'img/numedition_gender_deacdes' + self.pre + "-" + self.post +
            '.png')