def merge_by_year(dataframe1, dataframe2, year): '''function to merge the countries and income data sets for any given year.''' if RepresentsInt(year) == True: #this part same as one in display_distribution if int(year) > 2012 or int(year) < 1800: raise invalid_year() else: year_distribution = dataframe1.ix[year,:] year_distribution.name = 'Income' year_distribution.index.name = 'Country' region_distribution = pd.merge(pd.DataFrame(year_distribution), dataframe2, left_index = True, right_on = 'Country', how = 'inner') region_distribution_dropna = region_distribution.dropna() #drop the nan value return region_distribution_dropna else: raise invalid_input()
def display_distribution(year, dataframe): '''function to display the distribution of income per person across all countries in the world for the given year.''' if RepresentsInt(year) == True: #check if input year can be converted to an integer if int(year) >2012 or int(year) < 1800: #check if input year in the right range raise invalid_year() else: year_distribution = dataframe.ix[int(year),:] #select the data in the given year plt.figure(figsize = (8, 8)) year_distribution.dropna().hist() #drop the nan value and sort by the value plt.xlabel('Income per person') plt.ylabel('Frequency') plt.yticks(fontsize = 10) plt.title('Income distribution of per person in ' + str(year)) plt.show() else: raise invalid_input()