def calc_frame_txt(obj, calc_dict, calc_kind=None): if calc_kind is None: calc_kind = 1 if len(obj.text_frame.paragraphs) < 1 else 3 if calc_kind == 3: #text_frame 中有多个不同格式的文本,需要查runs,通常不应该是这样的 for paragraph in obj.text_frame.paragraphs: exp_list = [] if paragraph.text.find('{{') > -1: start, end, s_num, e_num = -1, -1, 0, 0 for idx, run in enumerate(paragraph.runs): if run.text.find('{{') > -1: s_num += 1 if s_num == 1: start = idx if run.text.find('}}') > -1: end = idx e_num += 1 if start >= 0 and end >= 0 and s_num == e_num: exp_list.append((start, end)) start, end, s_num, e_num = -1, -1, 0, 0 for start, end in exp_list: if start >= 0 and end >= 0 and start <= end: text = ''.join([ x.text for x in paragraph.runs[start:end + 1] ]) try: result = exec_template(env, text, calc_dict) except Exception as e: raise RuntimeError(text) paragraph.runs[start].text = result for x in paragraph.runs[start + 1:end + 1]: x.text = '' elif calc_kind == 2: for paragraph in obj.text_frame.paragraphs: if paragraph.text.find('{{') > -1: try: result = exec_template(env, paragraph.text, calc_dict) except: raise RuntimeError(paragraph.text) for run in paragraph.runs: run.text = '' #直接copy font 报错,我们通过将其他runs中的文字清空,计算出的新文字赋值给第一个run。这样就保留了格式 paragraph.runs[0].text = result else: expr = obj.text_frame.text if expr.find('{{') > -1: try: result = exec_template(env, expr, calc_dict) # env.from_string(expr) except: raise RuntimeError(paragraph.text) for paragraph in obj.text_frame.paragraphs: for run in paragraph.runs: run.text = '' #直接copy font 报错,我们通过将其他runs中的文字清空,计算出的新文字赋值给第一个run。这样就保留了格式 obj.text_frame.paragraphs[0].runs[0].text = result
def json_keyvalue_all(input_json, rootStack=[], ds_dict={}): try: env = get_jinja2_Environment() ret_val = False if isinstance(input_json, dict): for key in input_json.keys(): key_value = input_json.get(key) if isinstance(key_value, dict): rootStack.append((input_json, key)) return json_keyvalue_all(key_value, rootStack, ds_dict) elif isinstance(key_value, list): for idx, json_array in enumerate(key_value): rootStack.append((key_value, idx)) c_ret = ret_val or json_keyvalue_all( json_array, rootStack, ds_dict) ret_val = ret_val or c_ret if ret_val: #ar2 = list(map(list,zip(*arr))) #行列互换 arr = [one.split('\n') for one in json_array] max_len = max([len(x) for x in arr]) input_json[key] = key_value[0:idx] + [[ row[i] if len(row) > i else '' for row in arr ] for i in range(max_len)] break else: if isinstance(key_value, str) and key_value.find("{{") >= 0: #转换 print(str(key) + " = " + str(key_value)) input_json[key] = exec_template( env, key_value, ds_dict) #"被替换了a" #return True elif isinstance(input_json, list): for idx, input_json_array in enumerate(input_json): rootStack.append((input_json, idx)) c_ret = json_keyvalue_all(input_json_array, rootStack, ds_dict) ret_val = ret_val or c_ret return ret_val else: if isinstance(input_json, str) and input_json.find("{{") >= 0: #转换 print(" 数组内: " + str(input_json)) val = exec_template(env, input_json, ds_dict) #"被替\n换了c" if isinstance(rootStack[-1][0], list): rootStack[-1][0][rootStack[-1][1]] = val elif isinstance(rootStack[-1], dict): rootStack[-1][0][rootStack[-1][1]] = val return True finally: rootStack.pop() return False
def loop_one_txt(one_part,t_ds_dict,idx=0): expr_html=lxml.html.fromstring(one_part['txt']).text_content() if expr_html.startswith("http"): last_append=datetime.datetime.now().strftime("%#d%#H%#M%S") txt_tpl=f"http://hnapp.e-chinalife.com/weixin2/RedirctHandler2.aspx/637A7394-C8FE-4A8B-9D3A-7E7ADA492CE4/a{id}_{last_append}_{idx}.html" convert_html(f"{upload_path}/../../tmp/html/a{id}_{last_append}_{idx}.html",expr_html.getText(),t_ds_dict) else: txt_tpl=exec_template(None,expr_html,t_ds_dict) tpl_results.append({'name': one_part['name'],"result":txt_tpl.replace('\n','\n<br>'), "img": 'https://gw.alipayobjects.com/zos/rmsportal/WdGqmHpayyMjiEhcKoVE.png'} ) message=txt_tpl#json.dumps(txt_tpl)[1:-1].encode().decode('unicode_escape') for wx_user in one_part.get("wx_msg",'').strip().split(","): if wx_user.strip()!='': wx_queue.put({'type':'sendMessage',"wxid":wx_user,"content":message})
def calc_cell(sheet): line_cnt = 0 for row in sheet.rows: if line_cnt > 100: return line_cnt = line_cnt + 1 need_lines = 0 for cell in row: #模板计算 if cell.value is not None and cell.data_type == 's' and cell.value.find( '{{') > -1: result = exec_template(env, cell.value, real_dict) row = cell.row start_col = cell.column result_lines = result.split('\n') for one_line in result_lines: #展开模板计算结果 col = start_col for one in one_line.split(): p_cell = sheet.cell(row=row, column=col) if len(one) < 14 and is_number( one): #14位工号,已经到万亿了,现在还不可能有这么大的数 p_cell.value = float(one) p_cell.data_type = 'n' else: p_cell.value = one col = col + 1 row = row + 1 need_lines = row - cell.row continue #复制公式 elif cell.value is not None and cell.data_type == 'f' and cell.value.startswith( '='): row = cell.row + 1 for one in range(1, need_lines): p_cell = sheet.cell(row=row, column=cell.column).coordinate sheet[p_cell] = Translator( cell.value, origin=cell.coordinate).translate_formula(p_cell) row = row + 1 need_lines = row - cell.row continue need_lines = need_lines
def convert_file_for_txt(out_filename, template_file, ds_dict): '''按模板转换文本文件 ''' if not os.path.exists(os.path.split((os.path.realpath(out_filename)))[0]): os.makedirs(os.path.split((os.path.realpath(out_filename)))[0]) with open(template_file, 'rb') as f: data = f.read() f_charInfo = chardet.detect(data) encoding = 'utf-8' if f_charInfo['encoding'] is None else f_charInfo[ 'encoding'] with open(template_file,mode='r',encoding=encoding) as fr \ ,open(out_filename,mode='w',encoding=encoding) as fw: res = fr.read() result = exec_template(None, res, ds_dict) fw.seek(0) fw.truncate() fw.write(result) return result
def appendData_and_execLastSql(one_ds,ret,upload_path): k=one_ds['name'] v=ret[k] one_ds['exec_stat']="2:开始合并和执行最终sql" key_column=one_ds.get('key_column') if key_column is None: for key in v['data'].columns : if str(v['data'][key].dtype)=='object' and len(v['data'][key].unique())==len(v['data']): key_column=key break if key_column is None and v['data'].empty==False: key_column=v['data'].columns[0] one_ds['key_column']=key_column #t_append=v['p'].get('append') #if t_append is not None and isinstance( t_append,dict): # v['p']['append']=[t_append,] for one in one_ds.get('append',list()): one_ds['exec_stat']="3:开始合并"+one['from'] if one.get('from','')=='': continue elif one['from'].find(".xlsx")>0: data=pd.read_excel(os.path.join(upload_path, one['from'])) data[data.columns[0]]=data[data.columns[0]].astype(str) right_key_column=data.columns[0] elif one['from'].find(".csv")>0: data=pd_read_csv(os.path.join(upload_path, one['from'])) right_key_column=data.columns[0] elif one['from'][0:2] in ['上次','备份']:#备份22时05分 other=one['from'].split(":") backup_name=other[1] if len(other)>1 else k rptid=os.path.realpath(upload_path).split("\\")[-1] qushu_date=datetime.date.today()+datetime.timedelta(days=-1) if one['from'][0:2]=='上次': bak_file=os.path.realpath(os.path.join(upload_path+"../../../过往数据/", f"{rptid}_{backup_name}_上次")) else: bak_file=os.path.realpath(os.path.join(upload_path+"../../../过往数据/", f"{rptid}_{backup_name}_{qushu_date.isoformat()}")) if os.path.exists(f"{bak_file}.json"): with open(f"{bak_file}.json", 'r') as f: data = f.read() data=pd.read_json(data) elif not os.path.exists(f"{bak_file}.csv"): data=pd.DataFrame(columns=ret[backup_name]['data'].columns) else: data=pd_read_csv(f"{bak_file}.csv") if data.empty: data=pd.DataFrame(columns=ret[backup_name]['data'].columns) right_key_column=ret[backup_name]['p']['key_column'] elif ret.get(one['from']): data=ret[one['from']]['data'] right_key_column=ret[one['from']]['p']['key_column'] else: continue if right_key_column not in data.columns: right_key_column=data.columns[0] if v['data'].empty: v['data']=data key_column=right_key_column continue data=data[(data[right_key_column]!='') & (data[right_key_column].isnull()==False)].reset_index(drop=True) data[right_key_column]=data[right_key_column].astype(str) if len(data[right_key_column].unique())!=len(data): raise Exception(f"数据集【{v['p']['name']}】 的合并数据集【{one['from']}】的[{right_key_column}]列数据不唯一!") v['data']=v['data'].merge(data,how ="left", left_on=key_column, right_on=right_key_column,suffixes=('', f"_{one['from']}")).fillna(0) one_ds['exec_stat']="4:合并成功,开始数据转换" data=v['data'] one_ds['after_append_columns']=list(data.columns) if True:# v['p'].get('data_is_json',False)==False: start_number=False for x in data.columns:#尽可能的将关键字列之后的数据设置为float类型 if x==one_ds['key_column']: start_number=True continue if start_number==False: continue if data[x].dtype.name=='object': try: data[x]=data[x].astype(int) except: try: data[x]=data[x].astype(float) except: pass pass one_ds['exec_stat']="5:开始执行最终sql" sql=one_ds.get('sql','').strip() if sql!="" : exec_sql=exec_template(None,sql,[]) data=pandasql.sqldf(exec_sql,dict({key:value['data'] for key,value in ret.items()})) if(one_ds.get('vis_sql_conf') is not None and one_ds['vis_sql_conf'].get('expr','').strip()!=''): data=eval(k+one_ds['vis_sql_conf']['expr'],{k:data}) v['data']=data.round(2) one_ds['last_columns']=data.columns.values.tolist() one_ds['exec_stat']="9:完成sql执行"
def convert_file_for_pptx(out_filename, template_file, ds_dict): '''按模板转换xlsx文件 按字典转换模板文件,输出为out_filename ''' unzip_path = os.path.join(out_filename + 't\\pptx_tmp') if (os.path.exists(unzip_path)): shutil.rmtree(unzip_path) unzip_single(template_file, unzip_path) embeddings_path = os.path.join(unzip_path, "ppt\\embeddings") tmp_pd_dict = {} tmp_excel_active_sheet_dict = {} if (os.path.exists(embeddings_path)): for x in os.listdir(embeddings_path): if x.endswith('.xlsx'): active_name = convert_file_for_xlsx( os.path.join(embeddings_path, x), os.path.join(embeddings_path, x), ds_dict, outImage=False) tmp_excel_active_sheet_dict[x] = active_name tmp_pd_dict[x] = pd.read_excel(os.path.join( embeddings_path, x)) xlsx_emf_arr = [] root_path = os.path.join(unzip_path, "ppt") for slide in os.listdir(f"{root_path}\slides"): if slide.endswith(".xml") == False: continue doc = lxml.etree.XML( open(f"{root_path}\\slides\\{slide}", 'rb').read()) id_embed_dict = {} for one_oleObj in doc.xpath("//p:oleObj", namespaces=doc.nsmap): for one_blip in one_oleObj.xpath(".//a:blip", namespaces=doc.nsmap): id = one_oleObj.attrib.get('{' + doc.nsmap['r'] + '}id') embed = one_blip.attrib.get('{' + doc.nsmap['r'] + '}embed') id_embed_dict[id] = embed if len(id_embed_dict) > 0: rels = lxml.etree.XML( open(f"{root_path}\\slides\_rels\\{slide}.rels", 'rb').read()) for id, embed in id_embed_dict.items(): xlsx = rels.xpath( f"//*[local-name() = 'Relationship'][@Id='{id}'] " )[0].attrib['Target'] emf = rels.xpath( f"//*[local-name() = 'Relationship'][@Id='{embed}'] " )[0].attrib['Target'] xlsx_emf_arr.append({"xlsx": xlsx, "emf": emf, "slide": slide}) for one in xlsx_emf_arr: png_file = os.path.realpath(root_path + "/slides/" + one['xlsx'] + "1.png") emf_file = os.path.realpath(root_path + "/slides/" + one['emf']) excel2img.export_img( root_path + "/slides/" + one['xlsx'], png_file, tmp_excel_active_sheet_dict[one['xlsx'].split("/")[-1]]) my_cmd = f'convert "{png_file}" "{emf_file}"' cmd_output = os.popen(my_cmd).readlines() os.remove(png_file) zipDir(unzip_path, out_filename) shutil.rmtree(out_filename + "t") env = get_jinja2_Environment() ppt_file = Presentation(out_filename) ''' #expr title_lines=1 loop_var=index,row dataset=a.sort_values(zhibiao,ascending=False)[:size] ''' def calc_frame_txt(obj, calc_dict, calc_kind=None): if calc_kind is None: calc_kind = 1 if len(obj.text_frame.paragraphs) < 1 else 3 if calc_kind == 3: #text_frame 中有多个不同格式的文本,需要查runs,通常不应该是这样的 for paragraph in obj.text_frame.paragraphs: exp_list = [] if paragraph.text.find('{{') > -1: start, end, s_num, e_num = -1, -1, 0, 0 for idx, run in enumerate(paragraph.runs): if run.text.find('{{') > -1: s_num += 1 if s_num == 1: start = idx if run.text.find('}}') > -1: end = idx e_num += 1 if start >= 0 and end >= 0 and s_num == e_num: exp_list.append((start, end)) start, end, s_num, e_num = -1, -1, 0, 0 for start, end in exp_list: if start >= 0 and end >= 0 and start <= end: text = ''.join([ x.text for x in paragraph.runs[start:end + 1] ]) try: result = exec_template(env, text, calc_dict) except Exception as e: raise RuntimeError(text) paragraph.runs[start].text = result for x in paragraph.runs[start + 1:end + 1]: x.text = '' elif calc_kind == 2: for paragraph in obj.text_frame.paragraphs: if paragraph.text.find('{{') > -1: try: result = exec_template(env, paragraph.text, calc_dict) except: raise RuntimeError(paragraph.text) for run in paragraph.runs: run.text = '' #直接copy font 报错,我们通过将其他runs中的文字清空,计算出的新文字赋值给第一个run。这样就保留了格式 paragraph.runs[0].text = result else: expr = obj.text_frame.text if expr.find('{{') > -1: try: result = exec_template(env, expr, calc_dict) # env.from_string(expr) except: raise RuntimeError(paragraph.text) for paragraph in obj.text_frame.paragraphs: for run in paragraph.runs: run.text = '' #直接copy font 报错,我们通过将其他runs中的文字清空,计算出的新文字赋值给第一个run。这样就保留了格式 obj.text_frame.paragraphs[0].runs[0].text = result def handle_all_shapes(shapes, real_dict, tmp_pd_dict): # real_dict 我们使用这个参数来层层传递外面定义变量 #tmp_pd_dict 是专为内嵌excel准备的,貌似递归取不到外层定义的变量 for shape in shapes: #shape.part.related_parts['rId4'].blob if hasattr(shape, "shapes"): handle_all_shapes(shape.shapes, real_dict, tmp_pd_dict) continue if shape.has_text_frame or shape.has_table: pass if shape.shape_type == MSO_SHAPE_TYPE.EMBEDDED_OLE_OBJECT: pass if shape.has_text_frame: calc_frame_txt(shape, real_dict) elif shape.has_chart: key = shape.chart._workbook.xlsx_part.partname.split("/")[-1] # 定义图表数据 --------------------- chart_data = ChartData() columns = list(tmp_pd_dict[key].columns.values) chart_data.categories = tmp_pd_dict[key][columns[0]] for one in columns[1:]: chart_data.add_series(one, tuple(tmp_pd_dict[key][one])) shape.chart.replace_data(chart_data) elif shape.has_table: current_row = 0 for row in shape.table.rows: current_col = 0 for cell in row.cells: if cell.text_frame.text.find('{{') < 0: current_col = current_col + 1 continue try: result = exec_template(env, cell.text_frame.text, real_dict) except: raise RuntimeError(cell.text_frame.text) for paragraph in cell.text_frame.paragraphs: for run in paragraph.runs: run.text = '' #直接copy font 报错,我们通过将其他runs中的文字清空,计算出的新文字赋值给第一个run。这样就保留了格式 copy_row = current_row result_lines = result.split('\n') for one_line in result_lines: #展开模板计算结果 copy_col = current_col #从当前位置开始,复制结果到ppt的table中 for one in one_line.split(): cur_row_cells = shape.table.rows[ copy_row].cells if copy_col >= len( cur_row_cells ): #如果ppt table 中的列不够用,当前行的复制就结束 break p_cell = cur_row_cells[copy_col] if len(p_cell.text_frame.paragraphs[0].runs ) == 0: p_cell.text_frame.paragraphs[0].add_run() p_cell.text_frame.paragraphs[0].runs[ 0].text = one copy_col = copy_col + 1 copy_row = copy_row + 1 if copy_row >= len(shape.table.rows): #行不够就结束复制 break current_col = current_col + 1 current_row = current_row + 1 if current_row >= len(shape.table.rows): break try: real_dict = ds_dict.copy() for slide in ppt_file.slides: if slide.has_notes_slide: #抽取备注栏里面的变量定义,后页会覆盖前页 notes_text = slide.notes_slide.notes_text_frame.text for one_line in notes_text.split("\n"): var_expr = one_line.split("=") if len(var_expr) < 2: continue try: if var_expr[1].strip().startswith("{{"): result_lines = exec_template( env, var_expr[1], real_dict) else: result_lines = exec_template( env, "{{" + var_expr[1] + "}}", real_dict) real_dict = real_dict.copy() real_dict[var_expr[0].strip()] = result_lines except Exception as e: raise RuntimeError("\n备注说明中的公式不正确:" + one_line) handle_all_shapes(slide.shapes, real_dict, tmp_pd_dict) ppt_file.save(out_filename) finally: if ppt_file is not None: ppt_file.save(out_filename) del ppt_file ppt2png(out_filename, ds_dict.get("_idx_", ''))
def handle_all_shapes(shapes, real_dict, tmp_pd_dict): # real_dict 我们使用这个参数来层层传递外面定义变量 #tmp_pd_dict 是专为内嵌excel准备的,貌似递归取不到外层定义的变量 for shape in shapes: #shape.part.related_parts['rId4'].blob if hasattr(shape, "shapes"): handle_all_shapes(shape.shapes, real_dict, tmp_pd_dict) continue if shape.has_text_frame or shape.has_table: pass if shape.shape_type == MSO_SHAPE_TYPE.EMBEDDED_OLE_OBJECT: pass if shape.has_text_frame: calc_frame_txt(shape, real_dict) elif shape.has_chart: key = shape.chart._workbook.xlsx_part.partname.split("/")[-1] # 定义图表数据 --------------------- chart_data = ChartData() columns = list(tmp_pd_dict[key].columns.values) chart_data.categories = tmp_pd_dict[key][columns[0]] for one in columns[1:]: chart_data.add_series(one, tuple(tmp_pd_dict[key][one])) shape.chart.replace_data(chart_data) elif shape.has_table: current_row = 0 for row in shape.table.rows: current_col = 0 for cell in row.cells: if cell.text_frame.text.find('{{') < 0: current_col = current_col + 1 continue try: result = exec_template(env, cell.text_frame.text, real_dict) except: raise RuntimeError(cell.text_frame.text) for paragraph in cell.text_frame.paragraphs: for run in paragraph.runs: run.text = '' #直接copy font 报错,我们通过将其他runs中的文字清空,计算出的新文字赋值给第一个run。这样就保留了格式 copy_row = current_row result_lines = result.split('\n') for one_line in result_lines: #展开模板计算结果 copy_col = current_col #从当前位置开始,复制结果到ppt的table中 for one in one_line.split(): cur_row_cells = shape.table.rows[ copy_row].cells if copy_col >= len( cur_row_cells ): #如果ppt table 中的列不够用,当前行的复制就结束 break p_cell = cur_row_cells[copy_col] if len(p_cell.text_frame.paragraphs[0].runs ) == 0: p_cell.text_frame.paragraphs[0].add_run() p_cell.text_frame.paragraphs[0].runs[ 0].text = one copy_col = copy_col + 1 copy_row = copy_row + 1 if copy_row >= len(shape.table.rows): #行不够就结束复制 break current_col = current_col + 1 current_row = current_row + 1 if current_row >= len(shape.table.rows): break