def __init__(self,**config): # 设置参数,必要参数 self.raw_variables = config.get('variables') self.variables = list(set(self.raw_variables.keys())) self.assist_variables = set(self.raw_variables.values()) self.assist_variables.discard(None) self.assist_variables = list(self.assist_variables) self.region = config.get('region') self.year = config.get('year') self.scale = config.get('scale') self.report_title = config.get('report_title') self.report_path = config.get('report_path') # 设置参数,非必要参数 if 'report_author' in config.keys(): self.report_author = config.get('report_author') else: self.report_author = 'Plutoese' # 区域数据 self.region_data = RegionData() # 查询数据 if (self.region is not None) and (self.year is not None): print(self.variables) self._data = self.region_data.find(region=self.region,year=self.year,variable=self.variables,scale=self.scale) self._assist_data = self.region_data.find(region=self.region,year=self.year,variable=self.assist_variables,scale=self.scale) elif self.region is not None: self._data = self.region_data.find(region=self.region,variable=self.variables,scale=self.scale) self._assist_data = self.region_data.find(region=self.region,variable=self.assist_variables,scale=self.scale) elif self.year is not None: self._data = self.region_data.find(year=self.year,variable=self.variables,scale=self.scale) self._assist_data = self.region_data.find(year=self.year,variable=self.assist_variables,scale=self.scale) else: self._data = self.region_data.find(variable=self.variables,scale=self.scale) self._assist_data = self.region_data.find(variable=self.assist_variables,scale=self.scale) self.data = self._data['pdata'] self.year = list(self.data.items) # 格式转换 for y in self.year: self.data[y][self.variables] = self.data[y][self.variables].astype(np.float64) self.assist_data = self._assist_data['pdata'] # 生成报告 self.report = Report(title=self.report_title,author=self.report_author)
class RegionReportGenerator(): '''类RegionReportGenerator是区域分析报告生成器 :param dict config: 参数配置 ''' # 构造函数 def __init__(self,**config): # 设置参数,必要参数 self.raw_variables = config.get('variables') self.variables = list(set(self.raw_variables.keys())) self.assist_variables = set(self.raw_variables.values()) self.assist_variables.discard(None) self.assist_variables = list(self.assist_variables) self.region = config.get('region') self.year = config.get('year') self.scale = config.get('scale') self.report_title = config.get('report_title') self.report_path = config.get('report_path') # 设置参数,非必要参数 if 'report_author' in config.keys(): self.report_author = config.get('report_author') else: self.report_author = 'Plutoese' # 区域数据 self.region_data = RegionData() # 查询数据 if (self.region is not None) and (self.year is not None): print(self.variables) self._data = self.region_data.find(region=self.region,year=self.year,variable=self.variables,scale=self.scale) self._assist_data = self.region_data.find(region=self.region,year=self.year,variable=self.assist_variables,scale=self.scale) elif self.region is not None: self._data = self.region_data.find(region=self.region,variable=self.variables,scale=self.scale) self._assist_data = self.region_data.find(region=self.region,variable=self.assist_variables,scale=self.scale) elif self.year is not None: self._data = self.region_data.find(year=self.year,variable=self.variables,scale=self.scale) self._assist_data = self.region_data.find(year=self.year,variable=self.assist_variables,scale=self.scale) else: self._data = self.region_data.find(variable=self.variables,scale=self.scale) self._assist_data = self.region_data.find(variable=self.assist_variables,scale=self.scale) self.data = self._data['pdata'] self.year = list(self.data.items) # 格式转换 for y in self.year: self.data[y][self.variables] = self.data[y][self.variables].astype(np.float64) self.assist_data = self._assist_data['pdata'] # 生成报告 self.report = Report(title=self.report_title,author=self.report_author) # 变量转换 def variable_transformation(self): generator_data = dict() self.generator_variables = set() for year in list(self.data.items): assist_data_year = self.assist_data[year] del assist_data_year['region'] total_data = pd.merge(self.data[year], assist_data_year, left_index=True, right_index=True, how='outer') csdexplorer = CrossSectionRegionDataExplorer(total_data) cross_section_frame = pd.DataFrame({'region':self.data[year]['region']}) for var in self.data[year].columns[1:]: variable_name = var dframe = pd.DataFrame({variable_name:self.data[year][variable_name]}) if self.raw_variables[var] is not None: variable_name = '|'.join([var,self.raw_variables[var]]) dframe = csdexplorer.per_variable(pop=self.raw_variables[var],var=[var]) if (np.min(dframe[variable_name]) > 0) and (np.max(dframe[variable_name]) < 1): dframe = dframe.applymap(lambda x: 100 * x) cross_section_frame = pd.merge(cross_section_frame, dframe, left_index=True, right_index=True, how='outer') continue if (np.min(dframe[variable_name]) > 0) and (np.max(dframe[variable_name]) < 100): cross_section_frame = pd.merge(cross_section_frame, dframe, left_index=True, right_index=True, how='outer') continue if np.min(dframe[variable_name]) > 0: dframe = dframe.applymap(np.log) dframe.columns = ['|'.join([variable_name,u'对数'])] #print(dframe.columns) cross_section_frame = pd.merge(cross_section_frame, dframe, left_index=True, right_index=True, how='outer') generator_data[year] = cross_section_frame self.generator_variables.update(list(cross_section_frame.columns)) self.generate_data = pd.Panel(generator_data) for y in self.year: self.generate_data[y][list(self.generate_data[y].columns)[1:]] = self.generate_data[y][list(self.generate_data[y].columns)[1:]].astype(np.float64) self.generator_variables = list(self.generator_variables) self.generator_variables.remove('region') # 分析的主接口 def analysis(self): #self.describe() #self.describe(data_source='generated') self.corr() #self.corr(data_source='generated') # 相关性分析 def corr(self,data_source='Raw',model='auto'): if re.match('^Raw$',data_source): variables = self.variables data = self.data else: variables = self.generator_variables data = self.generate_data for year in self.year: self.report.create_section(year) data_frame = data[year] cse = CrossSectionRegionDataExplorer(data_frame) corr = cse.corr() corr2 = corr.applymap(abs) corr_rank = corr2.rank(method='max',ascending=False) print(corr) print(corr.shape[0],len(variables)) print(corr_rank) top_number = min(len(variables)+1,11) for var in variables: self.report.create_subsection(var) #print(corr[var]) corr_data = {'variable':corr_rank.index,'rank':corr_rank[var],'corr':corr[var]} corr_data_frame = pd.DataFrame(corr_data,columns=['rank','variable','corr']) corr_data_frame = corr_data_frame.set_index('rank') corr_data_frame = corr_data_frame.sort_index() corr_data_frame = corr_data_frame.reindex(list(range(2,len(variables)+1))) corr_data_frame = corr_data_frame.set_index('variable') corr_data_frame = corr_data_frame.applymap(lambda x:'{0:.2f}'.format(x)) corr_data_frame = corr_data_frame.iloc[:top_number] table_data = self.dataframe_to_list(corr_data_frame) self.report.add_table(table_data['data'],table_data['nrow'],table_data['ncol']) for xvar in corr_data_frame.index: plt = cse.scatter(y=var,x=xvar,save=True) self.report.add_matplotlib_graph(plt,caption='图形') plt.close() print('-------------------------') print(corr_data_frame) print(self.dataframe_to_list(corr_data_frame)) print('-------------------------') # 描述性统计 def describe(self,data_source='Raw'): if re.match('^Raw$',data_source): variables = self.variables data = self.data else: variables = self.generator_variables data = self.generate_data for var in variables: # 生成章节 self.report.create_section(var) for year in self.year: # 生成分支章节 self.report.create_subsection(year) #data_frame = pd.DataFrame({var:data[year][var]}) data_frame = data[year][['region',var]] cse = CrossSectionRegionDataExplorer(data_frame) #print(cse.describe().applymap(lambda x:'{0:.2f}'.format(x))) tdata = cse.describe().applymap(lambda x:'{0:.2f}'.format(x)) tdata = self.dataframe_to_list(tdata) print(var,year) self.report.add_table(tdata['data'],tdata['nrow'],tdata['ncol']) # 生成pdf报告 def generate_pdf(self): self.report.flush() self.report.generate_pdf(self.report_path) # 把data.frame转化为list def dataframe_to_list(self,dframe): dict_data = dframe.to_dict('split') result = dict_data['data'] [result[i].insert(0,dict_data['index'][i]) for i in range(len(result))] title = [u'变量'] title.extend(dict_data['columns']) result.insert(0,title) nrow = len(result) ncol = len(result[0]) return {'data':result,'ncol':ncol,'nrow':nrow}