def getIndicatorData(matchId, subIndicatorList,area): # , indexCount:param indexCount: 期数 """ 获取末端指标信息 :param subIndicatorList: subIndicatorList字段内容 :return: 指标信息 """ #对该处代码进行修改,主要目的位重新提取indicatorName所需用到的指标名列表和指标数据 indexList = [] #indicatorName 所有用到的指标名列表 dataList = [] gapList = [] changeRateList = [] avgList = [] dateList = [] indexFreqList = [] indicatorData = [] subIndicatorList = subIndicatorList[1:len(subIndicatorList)-1].split(",") #将所有时间区间分为一个id对应一个时间区间 保存为列表 #ind所保存的列表的数据的格式为 [1015:2000.12.31-2019.12.31] 形式 for ind in subIndicatorList: # 获取该标签名、数值信息(针对每一个指标进行处理) ind_id = ind.split(":")[0].strip('[').strip(']').strip('-').strip('(').strip(')')\ .strip('}').strip('{').strip(',').strip('.').strip(';') # 处理可能出现的拼写错误如 [18177 移除头尾的指定字符 ind_Duration=ind.split(":")[1] firsttime = ind_Duration[:10].replace('.', '-') lasttime = ind_Duration[11:].replace('.', '-') try: sql = "select indexName,indexFreq from macro_economic_child_indicator where id = " + ind_id indexList.append(rs.SuccessSql(sql, isSelect=True)[0]["indexName"]) indexFreqList.append(rs.SuccessSql(sql, isSelect=True)[0]["indexFreq"]) except Exception as e: sql0 = "update macro_child_report_module_indicator_date set " \ "note = '获取 macro_economic_indicator表 id=" + ind_id + " 指标名称失败,无该指标' where id = " + str(matchId) rs.SuccessSql(sql0, isSelect=False) else: #没有获取指标名的异常则继续执行 #如下函数主要目的为获取dateList、dataList、gapList、changeRateList、indexFreq、avgStr try:#时间区间为左闭右闭区间 if (firsttime == lasttime): sql = "SELECT * FROM macro_economic_child_data_raw WHERE indicatorId = '%s' and" \ " dataDate='%s' and area='%s'" % (ind_id, firsttime,area) else: sql = "SELECT * FROM macro_economic_child_data_raw WHERE indicatorId = '%s' and " \ "dataDate>='%s' and dataDate<='%s' and area='%s' order by dataDate asc" % \ (ind_id, firsttime, lasttime, area) indicatorData = rs.SuccessSql(sql, isSelect=True) except Exception as e: #错误异常并填入数据库中记录 sql0 = "update macro_child_report_module_indicator_date set " \ "note = '获取 macro_economic_child_data_raw indicatorId="+ind_id+" 各项数据失败,数据缺失' where id = "+str(matchId) rs.SuccessSql(sql0, isSelect=False) # 时间列表、初值列表、差值列表、变化率列表、均值列表 返回的值都为列表 dateList0, dataList0, gapList0, changeRateList0, avg0 = getList(indicatorData) dateList.append(dateList0) dataList.append(dataList0) gapList.append(gapList0) # 差值、变化率少一期 changeRateList.append(changeRateList0) avgList.append(avg0) return indexList, dataList, gapList, changeRateList, avgList, dateList, indexFreqList
def getNumberWordExp(matchId, expression): print(expression) sql = "select * from macro_child_report_calculate_exp_date where " \ "name = '"+expression+"' and matchId = "+str(matchId)+" and isDelete = 0 order by id desc" number_exp = rs.SuccessSql(sql, isSelect=True)[0]["expression"] if number_exp.find(expression) != -1: # 查看返回表达式内是否套着表达式名称 sql0 = "update macro_child_report_module_indicator_test set " \ "note = '"+str(matchId)+"式子"+expression+"套娃' where id = "+str(matchId) rs.SuccessSql(sql0, isSelect=False) return None # 表达式套娃错误。返回空诱导程序报错,终止此句文本替换 else: return number_exp
def remakeParagraphLevelContent(reportName, doc,date,area): sql = "select * from macro_child_report_module_indicator where moduleName = " \ "'"+str(reportName)+"' and isDelete = 0 and area='"+area+"'" result = rs.SuccessSql(sql, isSelect=True) # print(result) module_content = "" if(len(date)==4): enddate=str(date)+'-12-31' else: enddate=str(date)+'-31' for num in range(len(result)): print(result[num]) indicatorId=result[num]['indicatorId'] sql = "select * from macro_child_report_module_indicator_date where " \ "indicatorId = '%s' and date<='%s' and area='%s' and isDelete = 0 order by date desc"\ %(indicatorId,enddate,area) res = rs.SuccessSql(sql, isSelect=True)[0] if res['result'] is not None and res['result'] != "": paragraph = res['result'].lstrip(' ').replace('\n','') else: paragraph = "" module_content = module_content + paragraph if paragraph != "": # 本句有内容 # word:放入内容、设置格式 paragraph1 = doc.add_paragraph() ph_format = paragraph1.paragraph_format ph_format.space_before = Pt(10) # 设置段前间距 ph_format.space_after = Pt(30) # 设置段后间距 ph_format.line_spacing = Pt(25) # 设置行间距 ph_format.first_line_indent = 406400 # 行开头缩进2字符 run = paragraph1.add_run(paragraph) # 创建run,另设字号、粗细 chg_font(run, fontname='微软雅黑', size=Pt(12)) # 设置字体和字号 # 添加图片 img_path = getimage(res['id']) if img_path != "": # 能获取到图片 paragraph2 = doc.add_paragraph() run2 = paragraph2.add_run() # 创建run,另设字号、粗细 inline_shape = run2.add_picture(img_path) # 放入图片 scale = inline_shape.height / inline_shape.width # 计算图片长/宽比例 inline_shape.width = Cm(16) # 设置宽度 inline_shape.height = int(scale * inline_shape.width) # 计算新高度 必须是int paragraph2.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER # 居中 content = module_content.strip('\n') if content != "": return module_content, doc else: return "", doc # 该段落无内容
def module15(self): word = self.word number = self.number result = self.result datarange=self.datarange area=self.area #对单位进行处理 if (result['dataUnit'] == None): result['dataUnit'] = '' if (result['dataUnit'] == 'None'): result['dataUnit'] = '' #对地区area进行处理 if(area!='-1'): if(len(area)==6): sql = "select area_name from area where code = " + str(area) res1 = rs.SuccessSql(sql, isSelect=True)[0] area = res1['area_name'] indexName=area+'的'+result['indexName'] else: indexName=result['indexName'] number1 = "[4:a_data%s-a_data%s:1]" % (datarange, datarange - 4) number2 = "[4:a_data%s-a_data%s:2]" % (datarange, datarange - 4) number3 = "[6:number1/a_data%s:2]" % (datarange - 4) number4 = "[4:a_data%s-a_data1:2]" % (datarange) word1 = "[gapDesc:1:number1]" word2 = "[gapDesc:1:number4]" number_add=[number1,number2,number3,number4] word_add=[word1,word2] number=number+number_add word=word+word_add text='截止于[time'+str(datarange)+'],'+str(indexName)+'达到[a_data'+str(datarange)+']'+result['dataUnit']+';' \ '[time'+str(datarange-1)+'],该指标的数据达到[a_data'+str(datarange-1)+']'+result['dataUnit']+';' \ '[time'+str(datarange-2)+'],该指标的数据达到[a_data'+str(datarange-2)+']'+result['dataUnit']+';' \ '[time'+str(datarange-3)+'],该指标的数据达到[a_data'+str(datarange-3)+']'+result['dataUnit']+';' \ '[time'+str(datarange-4)+'],该指标的数据达到[a_data'+str(datarange-4)+']'+result['dataUnit']+'。' \ '由计算数据可以得出,最近五年[word1]了[number2]'+result['dataUnit']+',变化率同比[word1][number3]。' \ '我们可以根据历史数据看出,时间最久远一期的数据为[time1],具体统计数据为[a_data1]'+result['dataUnit']+'。' \ '同时,总整体的角度来分析,,在[time1-time'+str(datarange)+']期间,'+str(indexName)+'最大值曾达到[a_max]'+\ str(result['dataUnit'])+',最小值曾达到[a_min]'+str(result['dataUnit'])+',总体呈现为[word2]趋势。' return text,word,number # word=[] # number=[] # m=ModuleType(8) # text,word,number=m.module1(word,number) # print(text) # print(word) # print(number)
def module1(self): word = self.word number = self.number result = self.result datarange=self.datarange area=self.area #对单位进行处理 if (result['dataUnit'] == None): result['dataUnit'] = '' if (result['dataUnit'] == 'None'): result['dataUnit'] = '' #对地区area进行处理 if(area!='-1'): if(len(area)==6): sql = "select area_name from area where code = " + str(area) res1 = rs.SuccessSql(sql, isSelect=True)[0] area = res1['area_name'] indexName=area+'的'+result['indexName'] else: indexName=result['indexName'] print(datarange) text = '据最新统计数据显示,截至[time'+str(datarange)+'],' \ + str(indexName) + '的数据达到了[a_data'+str(datarange)+']' + str(result['dataUnit']) + '。' return text,word,number
def module10(self): word = self.word number = self.number result = self.result datarange=self.datarange area=self.area #对单位进行处理 if (result['dataUnit'] == None): result['dataUnit'] = '' if (result['dataUnit'] == 'None'): result['dataUnit'] = '' #对地区area进行处理 if(area!='-1'): if(len(area)==6): sql = "select area_name from area where code = " + str(area) res1 = rs.SuccessSql(sql, isSelect=True)[0] area = res1['area_name'] indexName=area+'的'+result['indexName'] else: indexName=result['indexName'] number1 = "[3:a_gap%s:2]" % (datarange - 1) number2 = "[3:a_changeRate%s:2]" % (datarange - 1) number3 = "[4:a_changeRate%s-a_changeRate%s:1]" % (datarange - 1, datarange - 2) number4 = "[4:a_changeRate%s-a_changeRate%s:3]" % (datarange - 1, datarange - 2) number5 = "[4: a_data%s-a_data1:1]" % (datarange) number6 = "[4: a_data%s-a_data1: 2]" % (datarange) word1 = "[gapDesc:1:a_gap%s]" % (datarange - 1) word2 = "[gapLevelDesc:1:a_changeRate%s]" % (datarange - 1) word3 = "[rateChangeDesc:1:number3]" word4 = "[gapDesc:1: number5]" number_add = [number1, number2, number3, number4, number5, number6] word_add = [word1, word2, word3, word4] number = number + number_add word = word + word_add text1='最近这几年来,人们对'+str(indexName)+'的关心程度呈现一定程度的增长趋势。' text2='具体来说,截至[time' + str(datarange) + '],' \ +str(indexName)+'的值为[a_data'+str(datarange)+']'+str(result['dataUnit'])+',' \ '该指标在[time'+str(datarange-1)+']同期的值为[a_data'+str(datarange-1)+']'+str(result['dataUnit'])+'。' \ '与[time'+str(datarange-1)+']同期相比[word1]了[number1]'+str(result['dataUnit'])+',' \ '同比[word1][number2],[word1]规模较为[word2],变化率较上一年度[word3][number4]个百分点。' text3='根据[time1-time'+str(datarange) +']中'+str(indexName)+'的统计数据,' \ '从变化率的角度来看,我们可以从大体上看出,自从[time1]以来,'+str(indexName)+'经历了一定程度的[word4],' \ '[time'+str(datarange) +']相比于[time1],[word4]了[number6]'+str(result['dataUnit'])+'。' \ '具体来说,从在最开始的[time1]的数据为[a_data1]'+str(result['dataUnit'])+',' \ '最后在[time'+str(datarange)+']年末达到了[a_data'+str(datarange)+']'+str(result['dataUnit'])+'。' text4='从宏观的数据大体来看,[time1-time'+str(datarange)+']期间,'+ \ str(indexName)+'平均值为[a_avg]'+str(result['dataUnit'])+'。' \ '从数据的峰值和谷值我们可以具体看出,在这几年中,我国'+str(indexName)+ \ '最大值曾达到[a_max]'+str(result['dataUnit'])+',最小值曾达到[a_min]'+str(result['dataUnit'])+'。' text = text1 + text2 + text3+text4 return text,word,number
def remakeSentence(senId): sql = "select * from macro_child_report_module_indicator_date where id = "+str(senId) # moduleId >= 33 limit 0, 1 module = rs.SuccessSql(sql, isSelect=True)[0] #返回查询得到的文本和其它内容 并保存为元组 # resultText = remakeText(module) # print(resultText) try: resultText = remakeText(module) print(resultText) except Exception as e: traceback.print_exc() # return "[ 句子("+str(senId)+")替换错误 ]" return "" else: if ("$$" in resultText): update_sql = "update macro_child_report_module_indicator_date set note =" + "'数据缺失'" + " where id = " + str( module["id"]) # resultText = module[''] else: update_sql = "update macro_child_report_module_indicator_date set result = '"+resultText+"' where id = "+str(module["id"]) print(update_sql) rs.SuccessSql(update_sql, isSelect=False)
def getimage(sentence_id): try: sql = "select * from macro_child_report_graph_date where moduleId = "+str(sentence_id) result = rs.SuccessSql(sql)[0] img_dir_path = "./图片/"+str(sentence_id) if not os.path.exists(img_dir_path): # 文件夹不存在 创建文件夹 os.makedirs(img_dir_path) img_path = os.path.join(img_dir_path, "%s.jpg" % result["id"]) # 存下图片 f = open(img_path, 'wb') f.write(result["image"]) return img_path except Exception as e: import traceback traceback.print_exc() return ""
def remakeParagraphLevel2Content_2(title, doc): sql = "select * from macro_child_report_module_indicator where moduleName = '"+\ str(title)+"' and isDelete = 0" result = rs.SuccessSql(sql, isSelect=True) # print(result) module_content = "" for num, res in enumerate(result): # 获取该句/段内容 --------------------------- # paragraph = remakeSentence(res['id']).replace('\n', '') # --------------------------- 获取该句/段内容 # 测试快速获取以前生成的句子组成文档 --------------------------- if res['result'] is not None and res['result'] != "": paragraph = res['result'].split(' ').replace('/n','') else: paragraph = "" # --------------------------- 测试快速获取以前生成的句子组成文档 module_content = module_content + paragraph if paragraph != "": # 本句有内容 # word:放入内容、设置格式 paragraph1 = doc.add_paragraph() ph_format = paragraph1.paragraph_format ph_format.space_before = Pt(10) # 设置段前间距 ph_format.space_after = Pt(12) # 设置段后间距 ph_format.line_spacing = Pt(25) # 设置行间距 # ph_format.first_line_indent = 406400 # 行开头缩进2字符 run = paragraph1.add_run(paragraph) # 创建run,另设字号、粗细 chg_font(run, fontname='微软雅黑', size=Pt(12)) # 设置字体和字号 # 添加图片 img_path = getimage(res['id']) if img_path != "": # 能获取到图片 paragraph2 = doc.add_paragraph() run2 = paragraph2.add_run() # 创建run,另设字号、粗细 inline_shape = run2.add_picture(img_path) # 放入图片 scale = inline_shape.height / inline_shape.width # 计算图片长/宽比例 inline_shape.width = Cm(16) # 设置宽度 inline_shape.height = int(scale * inline_shape.width) # 计算新高度 必须是int paragraph2.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER # 居中 content = module_content.strip('\n') if content != "": return module_content, doc else: return "", doc # 该段落无内容
def module2(self): word = self.word number = self.number result = self.result datarange=self.datarange area=self.area #对单位进行处理 if (result['dataUnit'] == None): result['dataUnit'] = '' if (result['dataUnit'] == 'None'): result['dataUnit'] = '' #对地区area进行处理 if(area!='-1'): if(len(area)==6): sql = "select area_name from area where code = " + str(area) res1 = rs.SuccessSql(sql, isSelect=True)[0] area = res1['area_name'] indexName=area+'的'+result['indexName'] else: indexName=result['indexName'] number1 ="[4: a_data%s-a_data%s: 1]" % (datarange, datarange - 2) number2 = "[6:number1/a_data%s:2]" % (datarange - 2) word1 = "[gapDesc:1:number1]" word2 = "[gapLevelDesc:1:number2]" number_add=[number1,number2] word_add=[word1,word2] number=number+number_add word=word+word_add text=str(indexName)+'是'+str(result['moduleName'])+'所需要考察的一个重要指标,' \ '通常对此产生不可忽略的作用。' \ '由最新更新的数据显示,最近三年'+str(indexName)+'的数据呈现[word1]趋势。' \ '由具体数据来看,[time'+str(datarange)+']该指标的数据为[a_data'+str(datarange)+\ ']'+result['dataUnit']+',[time'+str(datarange-1)+']该指标的数据为' \ '[a_data'+str(datarange-1)+']'+result['dataUnit']+',[time'+str(datarange-2)+\ ']该指标的数据为[a_data'+str(datarange-2)+']'+result['dataUnit']+'。这三年的变化率为[number2],' \ '相对来说变化速率较为[word2]。同时,我们可以从这三年的统计数据得出,该指标的平均值为[a_avg]'+result['dataUnit']+'。' return text,word,number
create_charts.create_base_pie(title, data_dic) else: create_charts.create_base_table(title, x_list, data_dic) if __name__ == '__main__': #293585 #7291√ for i in range(1, 116): data_list = [] x_list = [] data_dic = {} sql = "SELECT * FROM macro_child_report_module_indicator_date WHERE id =%s" % ( i) try: result = rs.SuccessSql(sql, isSelect=True)[0] # 返回查询得到的文本和其它内容 并保存为元组 except: print("notfound: module indicatorId" + str(i)) continue if (result['result'] == ""): continue # if(result['area']!='230281'): # continue subIndicatorList = result['subIndicatorList'] enddate = result['date'] datarange = result['datarange'] area = result['area'] subIndicatorList = subIndicatorList.strip('[]') ind_id = subIndicatorList.split(":")[0]
import runSql as rs for i in range(94968, 351043): # 最开始从map中找出所有的叶子指标==我们所需的所有叶子指标 # 1.2.3.4-指标源的指标需要indicatorId和area两个变量才能获得所有指标的数据 sql = "SELECT * FROM macro_economic_child_map WHERE id = '%d'" % i try: result = rs.SuccessSql(sql, isSelect=True)[0] # 返回查询得到的文本和其它内容 并保存为元组 except: print("notfound: childindicatorId = '%d'", i) continue indicatorId = result['childindicatorId'] childindexName = result['childindexName'] childlevel = result['childlevel'] dataSource = result['dataSource'] area = result['area'] dataduration = result['dataduration'] if (dataduration == None): continue #从macro_economic_child_indicator中获得叶子指标的指标时间频率和父节点(最后的文本大标题) sql = "SELECT parentId,indexFreq FROM macro_economic_child_indicator WHERE id = " + str( indicatorId) try: result = rs.SuccessSql(sql, isSelect=True)[0] # 返回查询得到的文本和其它内容 并保存为元组 except: print("not found parentid: indicatorId = " + str(indicatorId)) continue else: indexFreq = result['indexFreq']
def remakeOneReport(reportId): print("[", str(reportId), "START ]") sql = "select * from macro_child_report_category_date where id = "+str(reportId) # sql = "select * from macro_child_report_category_date where id = "+str(reportId)+" and isDelete = 0" results=rs.SuccessSql(sql, isSelect=True)[0] reportName=results['reportName'] parentId=results['reportId'] date=results['date'] area=results['area'] # 标题摘要部分 ================================================================================= BigreportName = reportName.split('_')[0] indexFreq = reportName.split('_')[1] if(area!='-1'): if(len(area)==6): sql = "select area_name from area where code = " + str(area) res1 = rs.SuccessSql(sql, isSelect=True)[0] areaname=res1['area_name'] else: areaname=area else: areaname='' if(indexFreq=='年'): reportType='年度分析报告' timename=date+'年' elif(indexFreq=='季'): reportType='季度分析报告' year=date.split('-')[0] month=date.split('-')[1] if(month=='3' or month=='03'): timename=year+'年第一季度' elif(month=='6' or month=='06'): timename=year+'年第二季度' elif(month=='9' or month=='09'): timename=year+'年第三季度' elif(month=='12' or month=='12'): timename=year+'年第四季度' else: timename='' elif(indexFreq=='月'): reportType='月度分析报告' year = date.split('-')[0] month = date.split('-')[1] if (month == '1' or month == '01'): timename = year + '年一月份' elif (month == '2' or month == '02'): timename = year + '年二月份' elif (month == '3' or month == '03'): timename = year + '年三月份' elif (month == '4' or month == '04'): timename = year + '年四月份' elif (month == '5' or month == '05'): timename = year + '年五月份' elif (month == '6' or month == '06'): timename = year + '年六月份' elif (month == '7' or month == '07'): timename = year + '年七月份' elif (month == '8' or month == '08'): timename = year + '年八月份' elif (month == '9' or month == '09'): timename = year + '年九月份' elif (month == '10' or month == '10'): timename = year + '年十月份' elif (month == '11' or month == '11'): timename = year + '年十一月份' elif (month == '12' or month == '12'): timename = year + '年十二月份' else: timename='' if(area=='-1'): title=timename+'关于'+BigreportName+'的数据分析报告' abstract = "截至" + date + "年末,主要针对"+BigreportName\ +"指标的数据进行分析和处理,生成相应的"+reportType else: BigreportName= BigreportName.lstrip('各省').lstrip('各市') title = timename +areaname+ '关于' + BigreportName + '的数据分析报告' abstract = "截至" + date + "年末,主要针对" +areaname+"的"+ BigreportName \ + "指标的数据进行分析和处理,生成相应的" + reportType print(abstract,title) # 制作封面文件 ================================================================================= print("制作封面...") path=os.getcwd() coverPage_filepath, doc \ = getCoverPage(path+"/报告/", path+"/报告/"+str(reportId), title, "", "云报告工作室") # —— 副标题十八字副标题十八字副标题十八字 doc.add_page_break() # 添加分页 # 正文部分 ================================================================================= print("添加正文...") paragraph2, doc = remakeParagraphLevelContent(reportName, doc,date,area) # paragraph, doc = remakeParagraphLevel1Content(res['id'], doc) # sql = "select id from macro_report_module_level1 where reportId = "+str(reportId)+" and isDelete = 0" # limit 0, 1 # result = rs.SuccessSql(sql, isSelect=True) # for res in result: # # 添加一级标题 (可能该模块会没有内容,后续可能将此标题(最后一段)删除) # # doc = addModuleTitle1(doc, res['moduleName']) # # # 获取该模块内容 # paragraph, doc = remakeParagraphLevel1Content(res['id'], doc) # if paragraph.strip('\n') == "": # word要删除本模块标题(此时的最后一段) # delete_paragraph(doc.paragraphs[len(doc.paragraphs)-1]) doc.add_page_break() # 添加分页 # 添加尾页 ================================================================================= print("添加尾页...") composer = Composer(doc) lastpage_filepath = "./报告/lastpage_zyb_纯生成报告用.docx" doc2 = Document(lastpage_filepath) composer.append(doc2) output_all_filepath = "./报告/"+str(reportId)+"/"+title+"_封面_正文_尾页.docx" composer.save(output_all_filepath) # 中间文件 后面删去 # 添加页码 word文件生成完毕 ================================================================================= print("添加页码...") output_filepath = "./报告/"+str(reportId)+"/"+title+".docx" addPageMark(output_all_filepath, output_filepath) print("word文件生成完毕,删去中间文件...") os.remove(output_all_filepath) # 删去中间文件 os.remove(coverPage_filepath) # 删去封面word文件 # PDF ================================================================================= print("word转PDF...") pdf_file = output_filepath.replace(".docx", ".pdf") # 【水印 待改成“智研报”】 output_filepath = path + output_filepath output_filepath = output_filepath.replace('./', '/').replace('\\', '/') pdf_file = path + pdf_file pdf_file = pdf_file.replace('./', '/').replace('\\', '/') print(output_filepath) print(pdf_file) doc2PDF(output_filepath, pdf_file) print("[", str(reportId), "FINISHED ]") return title, output_filepath, pdf_file,date,abstract # 返回word地址、pdf地址