Пример #1
0
 def calc_frame_txt(obj, calc_dict, calc_kind=None):
     if calc_kind is None:
         calc_kind = 1 if len(obj.text_frame.paragraphs) < 1 else 3
     if calc_kind == 3:  #text_frame 中有多个不同格式的文本,需要查runs,通常不应该是这样的
         for paragraph in obj.text_frame.paragraphs:
             exp_list = []
             if paragraph.text.find('{{') > -1:
                 start, end, s_num, e_num = -1, -1, 0, 0
                 for idx, run in enumerate(paragraph.runs):
                     if run.text.find('{{') > -1:
                         s_num += 1
                         if s_num == 1:
                             start = idx
                     if run.text.find('}}') > -1:
                         end = idx
                         e_num += 1
                     if start >= 0 and end >= 0 and s_num == e_num:
                         exp_list.append((start, end))
                         start, end, s_num, e_num = -1, -1, 0, 0
                 for start, end in exp_list:
                     if start >= 0 and end >= 0 and start <= end:
                         text = ''.join([
                             x.text for x in paragraph.runs[start:end + 1]
                         ])
                         try:
                             result = exec_template(env, text, calc_dict)
                         except Exception as e:
                             raise RuntimeError(text)
                         paragraph.runs[start].text = result
                         for x in paragraph.runs[start + 1:end + 1]:
                             x.text = ''
     elif calc_kind == 2:
         for paragraph in obj.text_frame.paragraphs:
             if paragraph.text.find('{{') > -1:
                 try:
                     result = exec_template(env, paragraph.text, calc_dict)
                 except:
                     raise RuntimeError(paragraph.text)
                 for run in paragraph.runs:
                     run.text = ''  #直接copy font 报错,我们通过将其他runs中的文字清空,计算出的新文字赋值给第一个run。这样就保留了格式
                 paragraph.runs[0].text = result
     else:
         expr = obj.text_frame.text
         if expr.find('{{') > -1:
             try:
                 result = exec_template(env, expr,
                                        calc_dict)  # env.from_string(expr)
             except:
                 raise RuntimeError(paragraph.text)
             for paragraph in obj.text_frame.paragraphs:
                 for run in paragraph.runs:
                     run.text = ''  #直接copy font 报错,我们通过将其他runs中的文字清空,计算出的新文字赋值给第一个run。这样就保留了格式
             obj.text_frame.paragraphs[0].runs[0].text = result
Пример #2
0
def json_keyvalue_all(input_json, rootStack=[], ds_dict={}):
    try:
        env = get_jinja2_Environment()
        ret_val = False
        if isinstance(input_json, dict):
            for key in input_json.keys():
                key_value = input_json.get(key)
                if isinstance(key_value, dict):
                    rootStack.append((input_json, key))
                    return json_keyvalue_all(key_value, rootStack, ds_dict)
                elif isinstance(key_value, list):
                    for idx, json_array in enumerate(key_value):
                        rootStack.append((key_value, idx))
                        c_ret = ret_val or json_keyvalue_all(
                            json_array, rootStack, ds_dict)
                        ret_val = ret_val or c_ret
                        if ret_val:
                            #ar2 = list(map(list,zip(*arr))) #行列互换
                            arr = [one.split('\n') for one in json_array]
                            max_len = max([len(x) for x in arr])
                            input_json[key] = key_value[0:idx] + [[
                                row[i] if len(row) > i else '' for row in arr
                            ] for i in range(max_len)]
                            break
                else:
                    if isinstance(key_value,
                                  str) and key_value.find("{{") >= 0:  #转换
                        print(str(key) + " = " + str(key_value))
                        input_json[key] = exec_template(
                            env, key_value, ds_dict)  #"被替换了a"
                        #return True
        elif isinstance(input_json, list):
            for idx, input_json_array in enumerate(input_json):
                rootStack.append((input_json, idx))
                c_ret = json_keyvalue_all(input_json_array, rootStack, ds_dict)
                ret_val = ret_val or c_ret
            return ret_val
        else:
            if isinstance(input_json, str) and input_json.find("{{") >= 0:  #转换
                print(" 数组内: " + str(input_json))
                val = exec_template(env, input_json, ds_dict)  #"被替\n换了c"
                if isinstance(rootStack[-1][0], list):
                    rootStack[-1][0][rootStack[-1][1]] = val
                elif isinstance(rootStack[-1], dict):
                    rootStack[-1][0][rootStack[-1][1]] = val
                return True
    finally:
        rootStack.pop()
    return False
Пример #3
0
 def loop_one_txt(one_part,t_ds_dict,idx=0):
     expr_html=lxml.html.fromstring(one_part['txt']).text_content()
     if expr_html.startswith("http"):
         last_append=datetime.datetime.now().strftime("%#d%#H%#M%S")
         txt_tpl=f"http://hnapp.e-chinalife.com/weixin2/RedirctHandler2.aspx/637A7394-C8FE-4A8B-9D3A-7E7ADA492CE4/a{id}_{last_append}_{idx}.html"
         convert_html(f"{upload_path}/../../tmp/html/a{id}_{last_append}_{idx}.html",expr_html.getText(),t_ds_dict)
     else:
         txt_tpl=exec_template(None,expr_html,t_ds_dict) 
     tpl_results.append({'name': one_part['name'],"result":txt_tpl.replace('\n','\n<br>'),
         "img": 'https://gw.alipayobjects.com/zos/rmsportal/WdGqmHpayyMjiEhcKoVE.png'}
     )
     message=txt_tpl#json.dumps(txt_tpl)[1:-1].encode().decode('unicode_escape')
     for wx_user in one_part.get("wx_msg",'').strip().split(","):
         if wx_user.strip()!='':
             wx_queue.put({'type':'sendMessage',"wxid":wx_user,"content":message})
Пример #4
0
 def calc_cell(sheet):
     line_cnt = 0
     for row in sheet.rows:
         if line_cnt > 100:
             return
         line_cnt = line_cnt + 1
         need_lines = 0
         for cell in row:
             #模板计算
             if cell.value is not None and cell.data_type == 's' and cell.value.find(
                     '{{') > -1:
                 result = exec_template(env, cell.value, real_dict)
                 row = cell.row
                 start_col = cell.column
                 result_lines = result.split('\n')
                 for one_line in result_lines:  #展开模板计算结果
                     col = start_col
                     for one in one_line.split():
                         p_cell = sheet.cell(row=row, column=col)
                         if len(one) < 14 and is_number(
                                 one):  #14位工号,已经到万亿了,现在还不可能有这么大的数
                             p_cell.value = float(one)
                             p_cell.data_type = 'n'
                         else:
                             p_cell.value = one
                         col = col + 1
                     row = row + 1
                 need_lines = row - cell.row
                 continue
             #复制公式
             elif cell.value is not None and cell.data_type == 'f' and cell.value.startswith(
                     '='):
                 row = cell.row + 1
                 for one in range(1, need_lines):
                     p_cell = sheet.cell(row=row,
                                         column=cell.column).coordinate
                     sheet[p_cell] = Translator(
                         cell.value,
                         origin=cell.coordinate).translate_formula(p_cell)
                     row = row + 1
                 need_lines = row - cell.row
                 continue
             need_lines = need_lines
Пример #5
0
def convert_file_for_txt(out_filename, template_file, ds_dict):
    '''按模板转换文本文件
    '''
    if not os.path.exists(os.path.split((os.path.realpath(out_filename)))[0]):
        os.makedirs(os.path.split((os.path.realpath(out_filename)))[0])

    with open(template_file, 'rb') as f:
        data = f.read()
        f_charInfo = chardet.detect(data)
    encoding = 'utf-8' if f_charInfo['encoding'] is None else f_charInfo[
        'encoding']
    with open(template_file,mode='r',encoding=encoding) as fr \
        ,open(out_filename,mode='w',encoding=encoding) as fw:
        res = fr.read()
        result = exec_template(None, res, ds_dict)
        fw.seek(0)
        fw.truncate()
        fw.write(result)
        return result
Пример #6
0
def appendData_and_execLastSql(one_ds,ret,upload_path):
    k=one_ds['name']
    v=ret[k]
    one_ds['exec_stat']="2:开始合并和执行最终sql"
    key_column=one_ds.get('key_column')
    if key_column is None:
        for key in v['data'].columns :
            if str(v['data'][key].dtype)=='object' and  len(v['data'][key].unique())==len(v['data']):
                key_column=key
                break
    if key_column is None and v['data'].empty==False:
        key_column=v['data'].columns[0]
    one_ds['key_column']=key_column
    
    #t_append=v['p'].get('append')
    #if t_append is not None  and isinstance( t_append,dict):
    #    v['p']['append']=[t_append,]
    for one in one_ds.get('append',list()):
        one_ds['exec_stat']="3:开始合并"+one['from']
        if one.get('from','')=='':
            continue
        elif one['from'].find(".xlsx")>0:
            data=pd.read_excel(os.path.join(upload_path, one['from']))
            data[data.columns[0]]=data[data.columns[0]].astype(str)
            right_key_column=data.columns[0]
        elif one['from'].find(".csv")>0:
            data=pd_read_csv(os.path.join(upload_path, one['from']))
            right_key_column=data.columns[0]
        elif one['from'][0:2] in ['上次','备份']:#备份22时05分
            other=one['from'].split(":")
            backup_name=other[1] if len(other)>1 else k
            rptid=os.path.realpath(upload_path).split("\\")[-1]
            qushu_date=datetime.date.today()+datetime.timedelta(days=-1)
            if one['from'][0:2]=='上次':
                bak_file=os.path.realpath(os.path.join(upload_path+"../../../过往数据/", f"{rptid}_{backup_name}_上次"))
            else:
                bak_file=os.path.realpath(os.path.join(upload_path+"../../../过往数据/", f"{rptid}_{backup_name}_{qushu_date.isoformat()}"))
            if os.path.exists(f"{bak_file}.json"):
                with open(f"{bak_file}.json", 'r') as f:
                    data = f.read()
                    data=pd.read_json(data)
            elif not os.path.exists(f"{bak_file}.csv"):
                data=pd.DataFrame(columns=ret[backup_name]['data'].columns)
            else:
                data=pd_read_csv(f"{bak_file}.csv")
            if data.empty:
                data=pd.DataFrame(columns=ret[backup_name]['data'].columns)
            right_key_column=ret[backup_name]['p']['key_column']
        elif ret.get(one['from']):
            data=ret[one['from']]['data']
            right_key_column=ret[one['from']]['p']['key_column']
        else:
            continue

        if right_key_column not in data.columns:
            right_key_column=data.columns[0]

        if v['data'].empty:
            v['data']=data
            key_column=right_key_column
            continue     
        data=data[(data[right_key_column]!='') & (data[right_key_column].isnull()==False)].reset_index(drop=True)
        data[right_key_column]=data[right_key_column].astype(str)
        if len(data[right_key_column].unique())!=len(data):
            raise Exception(f"数据集【{v['p']['name']}】 的合并数据集【{one['from']}】的[{right_key_column}]列数据不唯一!")

        v['data']=v['data'].merge(data,how ="left", left_on=key_column, right_on=right_key_column,suffixes=('', f"_{one['from']}")).fillna(0)
        
        
    one_ds['exec_stat']="4:合并成功,开始数据转换"
    data=v['data']
    one_ds['after_append_columns']=list(data.columns)
    if True:# v['p'].get('data_is_json',False)==False:
        start_number=False
        for x in data.columns:#尽可能的将关键字列之后的数据设置为float类型
            if x==one_ds['key_column']:
                start_number=True
                continue
            if start_number==False:
                continue
            if data[x].dtype.name=='object':
                try:
                    data[x]=data[x].astype(int)
                except:
                    try:
                        data[x]=data[x].astype(float)
                    except:
                        pass
                    pass
    one_ds['exec_stat']="5:开始执行最终sql"
    sql=one_ds.get('sql','').strip()
    if sql!="" :
        exec_sql=exec_template(None,sql,[])
        data=pandasql.sqldf(exec_sql,dict({key:value['data'] for key,value in ret.items()}))
    if(one_ds.get('vis_sql_conf') is not None and one_ds['vis_sql_conf'].get('expr','').strip()!=''):
        data=eval(k+one_ds['vis_sql_conf']['expr'],{k:data})
    v['data']=data.round(2)
    one_ds['last_columns']=data.columns.values.tolist()    
    one_ds['exec_stat']="9:完成sql执行"
Пример #7
0
def convert_file_for_pptx(out_filename, template_file, ds_dict):
    '''按模板转换xlsx文件
    按字典转换模板文件,输出为out_filename
    '''
    unzip_path = os.path.join(out_filename + 't\\pptx_tmp')
    if (os.path.exists(unzip_path)):
        shutil.rmtree(unzip_path)
    unzip_single(template_file, unzip_path)
    embeddings_path = os.path.join(unzip_path, "ppt\\embeddings")
    tmp_pd_dict = {}
    tmp_excel_active_sheet_dict = {}
    if (os.path.exists(embeddings_path)):
        for x in os.listdir(embeddings_path):
            if x.endswith('.xlsx'):
                active_name = convert_file_for_xlsx(
                    os.path.join(embeddings_path, x),
                    os.path.join(embeddings_path, x),
                    ds_dict,
                    outImage=False)
                tmp_excel_active_sheet_dict[x] = active_name
                tmp_pd_dict[x] = pd.read_excel(os.path.join(
                    embeddings_path, x))

    xlsx_emf_arr = []
    root_path = os.path.join(unzip_path, "ppt")
    for slide in os.listdir(f"{root_path}\slides"):
        if slide.endswith(".xml") == False:
            continue
        doc = lxml.etree.XML(
            open(f"{root_path}\\slides\\{slide}", 'rb').read())
        id_embed_dict = {}
        for one_oleObj in doc.xpath("//p:oleObj", namespaces=doc.nsmap):
            for one_blip in one_oleObj.xpath(".//a:blip",
                                             namespaces=doc.nsmap):
                id = one_oleObj.attrib.get('{' + doc.nsmap['r'] + '}id')
                embed = one_blip.attrib.get('{' + doc.nsmap['r'] + '}embed')
                id_embed_dict[id] = embed
        if len(id_embed_dict) > 0:
            rels = lxml.etree.XML(
                open(f"{root_path}\\slides\_rels\\{slide}.rels", 'rb').read())
            for id, embed in id_embed_dict.items():
                xlsx = rels.xpath(
                    f"//*[local-name() = 'Relationship'][@Id='{id}'] "
                )[0].attrib['Target']
                emf = rels.xpath(
                    f"//*[local-name() = 'Relationship'][@Id='{embed}'] "
                )[0].attrib['Target']
                xlsx_emf_arr.append({"xlsx": xlsx, "emf": emf, "slide": slide})
    for one in xlsx_emf_arr:
        png_file = os.path.realpath(root_path + "/slides/" + one['xlsx'] +
                                    "1.png")
        emf_file = os.path.realpath(root_path + "/slides/" + one['emf'])
        excel2img.export_img(
            root_path + "/slides/" + one['xlsx'], png_file,
            tmp_excel_active_sheet_dict[one['xlsx'].split("/")[-1]])
        my_cmd = f'convert "{png_file}" "{emf_file}"'
        cmd_output = os.popen(my_cmd).readlines()
        os.remove(png_file)

    zipDir(unzip_path, out_filename)
    shutil.rmtree(out_filename + "t")

    env = get_jinja2_Environment()
    ppt_file = Presentation(out_filename)
    '''
    #expr
    title_lines=1
    loop_var=index,row
    dataset=a.sort_values(zhibiao,ascending=False)[:size]
    '''
    def calc_frame_txt(obj, calc_dict, calc_kind=None):
        if calc_kind is None:
            calc_kind = 1 if len(obj.text_frame.paragraphs) < 1 else 3
        if calc_kind == 3:  #text_frame 中有多个不同格式的文本,需要查runs,通常不应该是这样的
            for paragraph in obj.text_frame.paragraphs:
                exp_list = []
                if paragraph.text.find('{{') > -1:
                    start, end, s_num, e_num = -1, -1, 0, 0
                    for idx, run in enumerate(paragraph.runs):
                        if run.text.find('{{') > -1:
                            s_num += 1
                            if s_num == 1:
                                start = idx
                        if run.text.find('}}') > -1:
                            end = idx
                            e_num += 1
                        if start >= 0 and end >= 0 and s_num == e_num:
                            exp_list.append((start, end))
                            start, end, s_num, e_num = -1, -1, 0, 0
                    for start, end in exp_list:
                        if start >= 0 and end >= 0 and start <= end:
                            text = ''.join([
                                x.text for x in paragraph.runs[start:end + 1]
                            ])
                            try:
                                result = exec_template(env, text, calc_dict)
                            except Exception as e:
                                raise RuntimeError(text)
                            paragraph.runs[start].text = result
                            for x in paragraph.runs[start + 1:end + 1]:
                                x.text = ''
        elif calc_kind == 2:
            for paragraph in obj.text_frame.paragraphs:
                if paragraph.text.find('{{') > -1:
                    try:
                        result = exec_template(env, paragraph.text, calc_dict)
                    except:
                        raise RuntimeError(paragraph.text)
                    for run in paragraph.runs:
                        run.text = ''  #直接copy font 报错,我们通过将其他runs中的文字清空,计算出的新文字赋值给第一个run。这样就保留了格式
                    paragraph.runs[0].text = result
        else:
            expr = obj.text_frame.text
            if expr.find('{{') > -1:
                try:
                    result = exec_template(env, expr,
                                           calc_dict)  # env.from_string(expr)
                except:
                    raise RuntimeError(paragraph.text)
                for paragraph in obj.text_frame.paragraphs:
                    for run in paragraph.runs:
                        run.text = ''  #直接copy font 报错,我们通过将其他runs中的文字清空,计算出的新文字赋值给第一个run。这样就保留了格式
                obj.text_frame.paragraphs[0].runs[0].text = result

    def handle_all_shapes(shapes, real_dict, tmp_pd_dict):
        # real_dict 我们使用这个参数来层层传递外面定义变量
        #tmp_pd_dict 是专为内嵌excel准备的,貌似递归取不到外层定义的变量
        for shape in shapes:  #shape.part.related_parts['rId4'].blob
            if hasattr(shape, "shapes"):
                handle_all_shapes(shape.shapes, real_dict, tmp_pd_dict)
                continue
            if shape.has_text_frame or shape.has_table:
                pass
            if shape.shape_type == MSO_SHAPE_TYPE.EMBEDDED_OLE_OBJECT:
                pass
            if shape.has_text_frame:
                calc_frame_txt(shape, real_dict)
            elif shape.has_chart:
                key = shape.chart._workbook.xlsx_part.partname.split("/")[-1]
                # 定义图表数据 ---------------------
                chart_data = ChartData()
                columns = list(tmp_pd_dict[key].columns.values)
                chart_data.categories = tmp_pd_dict[key][columns[0]]
                for one in columns[1:]:
                    chart_data.add_series(one, tuple(tmp_pd_dict[key][one]))
                shape.chart.replace_data(chart_data)
            elif shape.has_table:
                current_row = 0
                for row in shape.table.rows:
                    current_col = 0
                    for cell in row.cells:
                        if cell.text_frame.text.find('{{') < 0:
                            current_col = current_col + 1
                            continue
                        try:
                            result = exec_template(env, cell.text_frame.text,
                                                   real_dict)
                        except:
                            raise RuntimeError(cell.text_frame.text)
                        for paragraph in cell.text_frame.paragraphs:
                            for run in paragraph.runs:
                                run.text = ''  #直接copy font 报错,我们通过将其他runs中的文字清空,计算出的新文字赋值给第一个run。这样就保留了格式
                        copy_row = current_row
                        result_lines = result.split('\n')
                        for one_line in result_lines:  #展开模板计算结果
                            copy_col = current_col
                            #从当前位置开始,复制结果到ppt的table中
                            for one in one_line.split():
                                cur_row_cells = shape.table.rows[
                                    copy_row].cells
                                if copy_col >= len(
                                        cur_row_cells
                                ):  #如果ppt table 中的列不够用,当前行的复制就结束
                                    break
                                p_cell = cur_row_cells[copy_col]
                                if len(p_cell.text_frame.paragraphs[0].runs
                                       ) == 0:
                                    p_cell.text_frame.paragraphs[0].add_run()
                                p_cell.text_frame.paragraphs[0].runs[
                                    0].text = one
                                copy_col = copy_col + 1
                            copy_row = copy_row + 1
                            if copy_row >= len(shape.table.rows):  #行不够就结束复制
                                break
                        current_col = current_col + 1
                    current_row = current_row + 1
                    if current_row >= len(shape.table.rows):
                        break

    try:
        real_dict = ds_dict.copy()
        for slide in ppt_file.slides:
            if slide.has_notes_slide:  #抽取备注栏里面的变量定义,后页会覆盖前页
                notes_text = slide.notes_slide.notes_text_frame.text
                for one_line in notes_text.split("\n"):
                    var_expr = one_line.split("=")
                    if len(var_expr) < 2:
                        continue
                    try:
                        if var_expr[1].strip().startswith("{{"):
                            result_lines = exec_template(
                                env, var_expr[1], real_dict)
                        else:
                            result_lines = exec_template(
                                env, "{{" + var_expr[1] + "}}", real_dict)
                        real_dict = real_dict.copy()
                        real_dict[var_expr[0].strip()] = result_lines
                    except Exception as e:
                        raise RuntimeError("\n备注说明中的公式不正确:" + one_line)

            handle_all_shapes(slide.shapes, real_dict, tmp_pd_dict)

        ppt_file.save(out_filename)
    finally:
        if ppt_file is not None:
            ppt_file.save(out_filename)
            del ppt_file
        ppt2png(out_filename, ds_dict.get("_idx_", ''))
Пример #8
0
 def handle_all_shapes(shapes, real_dict, tmp_pd_dict):
     # real_dict 我们使用这个参数来层层传递外面定义变量
     #tmp_pd_dict 是专为内嵌excel准备的,貌似递归取不到外层定义的变量
     for shape in shapes:  #shape.part.related_parts['rId4'].blob
         if hasattr(shape, "shapes"):
             handle_all_shapes(shape.shapes, real_dict, tmp_pd_dict)
             continue
         if shape.has_text_frame or shape.has_table:
             pass
         if shape.shape_type == MSO_SHAPE_TYPE.EMBEDDED_OLE_OBJECT:
             pass
         if shape.has_text_frame:
             calc_frame_txt(shape, real_dict)
         elif shape.has_chart:
             key = shape.chart._workbook.xlsx_part.partname.split("/")[-1]
             # 定义图表数据 ---------------------
             chart_data = ChartData()
             columns = list(tmp_pd_dict[key].columns.values)
             chart_data.categories = tmp_pd_dict[key][columns[0]]
             for one in columns[1:]:
                 chart_data.add_series(one, tuple(tmp_pd_dict[key][one]))
             shape.chart.replace_data(chart_data)
         elif shape.has_table:
             current_row = 0
             for row in shape.table.rows:
                 current_col = 0
                 for cell in row.cells:
                     if cell.text_frame.text.find('{{') < 0:
                         current_col = current_col + 1
                         continue
                     try:
                         result = exec_template(env, cell.text_frame.text,
                                                real_dict)
                     except:
                         raise RuntimeError(cell.text_frame.text)
                     for paragraph in cell.text_frame.paragraphs:
                         for run in paragraph.runs:
                             run.text = ''  #直接copy font 报错,我们通过将其他runs中的文字清空,计算出的新文字赋值给第一个run。这样就保留了格式
                     copy_row = current_row
                     result_lines = result.split('\n')
                     for one_line in result_lines:  #展开模板计算结果
                         copy_col = current_col
                         #从当前位置开始,复制结果到ppt的table中
                         for one in one_line.split():
                             cur_row_cells = shape.table.rows[
                                 copy_row].cells
                             if copy_col >= len(
                                     cur_row_cells
                             ):  #如果ppt table 中的列不够用,当前行的复制就结束
                                 break
                             p_cell = cur_row_cells[copy_col]
                             if len(p_cell.text_frame.paragraphs[0].runs
                                    ) == 0:
                                 p_cell.text_frame.paragraphs[0].add_run()
                             p_cell.text_frame.paragraphs[0].runs[
                                 0].text = one
                             copy_col = copy_col + 1
                         copy_row = copy_row + 1
                         if copy_row >= len(shape.table.rows):  #行不够就结束复制
                             break
                     current_col = current_col + 1
                 current_row = current_row + 1
                 if current_row >= len(shape.table.rows):
                     break