def test_merge_by_year_3(self): """this is to test for the third time""" countries = pd.read_csv("countries.csv", sep=",") income = pd.read_excel("indicator gapminder gdp_per_capita_ppp.xlsx", sheetname="Data", index_col=0) income2 = income.transpose() result = data_process_functions.merge_by_year(1800, countries, income2) self.assertEqual(result["Country"][27], "Mauritius") self.assertEqual(result["Region"][27], "AFRICA") self.assertEqual(int(result["Income"][27]), 799)
def test_merge_by_year_1(self): """this is to test for the first time""" countries = pd.read_csv("./countries.csv", sep=",") income = pd.read_excel("./indicator gapminder gdp_per_capita_ppp.xlsx", sheetname="Data", index_col=0) income2 = income.transpose() result = data_process_functions.merge_by_year(1800, countries, income2) self.assertEqual(result["Country"][176], "Venezuela") self.assertEqual(result["Region"][176], "SOUTH AMERICA") self.assertEqual(int(result["Income"][176]), 442)
def test_merge_by_year_case4(self): #load the data countries = pd.read_csv('countries.csv',sep=',') income = pd.read_excel('indicator gapminder gdp_per_capita_ppp.xlsx',sheetname='Data',index_col=0) income_new = income.transpose() #merge function merged_result = data_process_functions.merge_by_year(2012,countries,income_new) self.assertEqual(merged_result['Country'][147],'Saint Kitts and Nevis') self.assertEqual(merged_result['Region'][147],'NORTH AMERICA') self.assertEqual(int(merged_result['Income'][147]),12659)
def test_merge_by_year_case3(self): #load the data countries = pd.read_csv('countries.csv',sep=',') income = pd.read_excel('indicator gapminder gdp_per_capita_ppp.xlsx',sheetname='Data',index_col=0) income_new = income.transpose() #merge function merged_result = data_process_functions.merge_by_year(1800,countries,income_new) self.assertEqual(merged_result['Country'][176],'Venezuela') self.assertEqual(merged_result['Region'][176],'SOUTH AMERICA') self.assertEqual(int(merged_result['Income'][176]),442)
def test_merge_by_year_case1(self): #load the data countries = pd.read_csv('countries.csv',sep=',') income = pd.read_excel('indicator gapminder gdp_per_capita_ppp.xlsx',sheetname='Data',index_col=0) income_new = income.transpose() #merge function merged_result = data_process_functions.merge_by_year(2003,countries,income_new) self.assertEqual(merged_result['Country'][27],'Mauritius') self.assertEqual(merged_result['Region'][27],'AFRICA') self.assertEqual(int(merged_result['Income'][27]),9564)
def generate_answers_hw9(): '''this function will generate answers of assignment 9 ''' #Q1 countries = pd.read_csv('countries.csv',sep=',') #Q2 load the xlsx data and on the sheet 'Data' income = pd.read_excel('indicator gapminder gdp_per_capita_ppp.xlsx',sheetname='Data',index_col=0) #Q3 income_new = income.transpose()#the transpose of the dataframe income print "The head of the data set:" print income_new.head() print "The data has been loaded successfully!\n" #the user interaction part while True: try: print "Please Enter a year from 1800 to 2012 to check the income distribution (for example: 2003), enter 'finish' to stop checking" print "(Hint:Close the plot window to continue)" year_input = raw_input() if year_input == "finish": #quit the program print "the check process is finished" break if year_input == "": #test empty input raise Empty_Input_Error if not re.match(r'^[1-9][0-9][0-9][0-9]$',year_input): raise Invalid_Input_Error else: year_number = int(year_input) if year_number in income_new.index: #Q4 display the distribution of income per person across all countries for the given year data_process_functions.display_income_dist_by_year(year_number,income_new) else: raise Invalid_Input_Error except Empty_Input_Error: print "Warning: The input is empty! please re-enter the list" except Invalid_Input_Error: print "Warning: The input number is not valid! please re-enter the year" #generate the plots from 2007 and 2012 print "the program is generating graphs for the years 2007-2012..." for year_ind in range(2007,2013): merged_data = data_process_functions.merge_by_year(year_ind,countries,income_new) # Q5 data_analysis_instance = data_analysis_tools.Data_Analysis_Tools(year_ind, merged_data) #Q6 data_analysis_instance.plot_boxplots() #Q8 data_analysis_instance.plot_histograms() #Q8 print "Congratulations! the results are saved succeffully, thanks for trying ,bye"
def generate_answers(): '''this is to generate the answers''' # question 1 countries = pd.read_csv('./countries.csv', sep= ',') # question 2 income = pd.read_excel('./indicator gapminder gdp_per_capita_ppp.xlsx', sheetname='Data', index_col=0) # question 3 income2 = income.transpose() print "The head of the data set is:" print income2.head() while True: try: print "Please Enter a year from 1800 to 2012 to check the income distribution (hint: enter 'finish' to stop displaying)" year_input = raw_input() if year_input == "finish": break if year_input == "": raise Empty_Input_Error if not re.match(r'^[1-9][0-9][0-9][0-9]$',year_input): raise Invalid_Input_Error else: year_number = int(year_input) if year_number in income2.index: # question4: display the distribution of income per person across all countries for the given year data_process_functions.display_income_distribution(year_number,income2) else: raise Invalid_Input_Error except Empty_Input_Error: pass except Invalid_Input_Error: pass # question 8: generate graphs for the years of 2007-2012 print "The graphs for 2007-2012 are generating..." for year_i in range(2007,2013): merged_data = data_process_functions.merge_by_year(year_i, countries, income2) graph_data = data_analysis_tools.data_analysis(year_i, merged_data) graph_data.plot_histograms() graph_data.plot_boxplots() print "The graphs for 2007-2012 have been successfully generated. Please check the directory. Thanks!"