Exemplos de HeadColumnsGeneration em Python, exemplos de operation_head_column.HeadColumnsGeneration em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: statistics_silt_classification.py Projeto: VB6Hobbyst7/Data-Statistics-Visualization

def MergedWorkbookClassification(list_xls_path,num_head_rows,num_head_columns):
    
    print('')
    print('--Merged Workbook Classification')
    
    plt.style.use('ggplot')
    
    #construct output folder path
    tables_output_folder=list_xls_path[0].split('input')[0]+'output\\颗分汇总\\分类\\'
    figures_output_folder=list_xls_path[0].split('input')[0]+'output\\颗分汇总\\分类\\图\\总图\\'
        
    #generate output folder
    O_P.GenerateFolder(tables_output_folder)
    O_P.GenerateFolder(figures_output_folder)
    
    #DF channels
    total_channels=[]
    
    for this_xls_path in list_xls_path:
        
        #open the excel sheet to be operated on
        #formatting_info: keep the header format
        workbook=xlrd.open_workbook(this_xls_path,formatting_info=True)
        
        #construct map between sheet names and head rows
        list_sheet_names=list(workbook.sheet_names())
     
        #traverse all sheets
        for this_sheet_name in list_sheet_names:
            
            #Data Frame object
            that_channel=pd.read_excel(this_xls_path,sheet_name=this_sheet_name)
            
            #collect it
            total_channels.append(that_channel)
     
    title_list=['粉土密实度分类',
                '粉土湿度分类',
                '黏性土状态分类',
                '土的分类',
                '备注',
                '砂类土分类（代号）',
                '砾类土分类（代号）',
                '砂类土分类（名称）',
                '砾类土分类（名称）']
    
    #classification result list
    classification_ω0=[]
    classification_e0=[]
    classification_IL=[]
    classification_GB=[]
    classification_note=[]
    classification_S_type=[]
    classification_G_type=[]
    classification_S_code=[]
    classification_G_code=[]
    
    #traverse all sheets
    for channel in total_channels:
    
        print('')
        print('...')
        print('......')
        print('')

        final_head_columns,unit_list=O_H_C.HeadColumnsGeneration(channel,num_head_rows)
        
        #all info of dataframe
        value_matrix=channel.values
        
        #delete the repetition
        index_valid=O_L.ValidIndexList(value_matrix[num_head_rows:,1])
        
        print('-->Valid Samples:',len(index_valid))
        
        for k in range(len(final_head_columns)):
            
            this_head=final_head_columns[k]
            
            #search for note and make statistics
            if '备' in this_head or '注' in this_head:
                
                list_note=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_note=this_head
                
                print('-->head:'+head_note)
                
            #search for type of silt
            if '分类' in this_head:
                
                list_GB=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_GB=this_head
                
                print('-->head:'+head_GB)
                
            #search for pore ratio
            if 'e0' in this_head:
     
                list_e0=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_e0=this_head
                
                print('-->head:'+head_e0)
             
            #search for moisture content
            if 'ω0' in this_head:
                
                list_ω0=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_ω0=this_head 
                
                print('-->head:'+head_ω0)
                
            #search for liquidity index
            if 'IL' in this_head:
                
                list_IL=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_IL=this_head
                
                print('-->head:'+head_IL)
        
        #delete the repetition and remove label R
        index_valid=O_L.ListWithR(value_matrix[num_head_rows:,1])
     
        print('-->Total Samples:',len(value_matrix[num_head_rows:,1]))
        print('-->Valid Samples:',len(index_valid))
        
        #partition index list
        list_partition_index=[]
        
        for k in range(num_head_columns,np.shape(value_matrix)[1]):
    
            #title str
            title=final_head_columns[k]
            
    #        print(k,title)
        
            if '颗' and '粒' and '分' and '析' in title:
                
                print('-->',title)
                     
                list_partition_index.append(k)
            
            if '不' and '均' and '匀' in title:
                
                print('-->',title)
                
                data_Cu=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                
            if '曲' and '率' in title:
                
                print('-->',title)
                
                data_Ce=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                
            if '分' and '类' in title:
                
                print('-->',title)
                
                data_GB=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
            
    #    print(list_partition_index)
        
        #for partition
        index_partition=O_L.GBIndexPartition(data_GB)
         
        #matrix to contain grain partition proportion
        data_partition=np.zeros((len(index_partition),len(list_partition_index)))
        
        column=0
        
        for this_index in list_partition_index:
            
            data_partition[:,column]=O_L.CustomIndexList(list(value_matrix[num_head_rows:,this_index]),index_partition)
        
            column+=1
        
        #valid part
        GB_partition=O_L.CustomIndexList(data_GB,index_partition)
        Cu_partition=O_L.CustomIndexList(data_Cu,index_partition)
        Ce_partition=O_L.CustomIndexList(data_Ce,index_partition)
        
    #        len(index_valid)
        
        #classificaiotn result
        S_classification_type=[]
        G_classification_type=[]
        S_classification_code=[]
        G_classification_code=[]
        
        for kk in range(len(index_partition)):
                  
            #construct new object
            this_grain=C_F_V.grain()
            
            this_grain.silt_type=GB_partition[kk]
            this_grain.InitMap(list(data_partition[kk,:]))   
            
            this_grain.Partition()
            this_grain.Classification(Cu_partition[kk],Ce_partition[kk])
            
            if '砂' in this_grain.silt_type:
                
                S_classification_type.append(this_grain.classification_type)
                S_classification_code.append(this_grain.classification_code)
                
            if '砾' in this_grain.silt_type:
                
                G_classification_type.append(this_grain.classification_type)
                G_classification_code.append(this_grain.classification_code)
            
        #filter floury soil
        index_floury_soil=O_L.GBIndexFlourySoil(list_GB)

        ω0_valid=O_L.CustomIndexList(list_ω0,index_floury_soil)
        e0_valid=O_L.CustomIndexList(list_e0,index_floury_soil)

        #filter cohesive silt
        index_cohesive_silt=O_L.GBIndexCohesiveSilt(list_GB)

        IL_valid=O_L.CustomIndexList(list_IL,index_cohesive_silt)
     
        #list of classification result
        #floury soil
        classification_ω0+=SiltMoistureClassification(ω0_valid,num_head_rows)
        classification_e0+=SiltCompactnessClassification(e0_valid,num_head_rows)
        
        #cohesive silt
        classification_IL+=ClayeySiltStateClassification(IL_valid,num_head_rows)
        
        #GB
        classification_GB+=list_GB
        
        #note
        classification_note+=list_note
        
#        print(len(classification_GB),len(classification_note))
        
        #grain partition result
        classification_S_type+=S_classification_type
        classification_G_type+=G_classification_type
        classification_S_code+=S_classification_code
        classification_G_code+=G_classification_code
        
    #collect them into list
    classification_list=[classification_e0,
                         classification_ω0,
                         classification_IL,
                         classification_GB,
                         classification_note,
                         classification_S_type,
                         classification_G_type,
                         classification_S_code,
                         classification_G_code]
        
    #delete blank list
    real_title_list=O_L.CustomIndexList(title_list,O_L.DeleteBlankList(classification_list))
    real_classification_list=O_L.CustomIndexList(classification_list,O_L.DeleteBlankList(classification_list))
    
    #delete nan in classification list
    new_classification_list=[]
    
    for this_classification in real_classification_list:
        
        new_classification=[]
    
        for item in this_classification:
            
            if not isinstance(item,str):
                
                if np.isnan(item):
                    
#                    print('nan')
                    
                    continue
                
            new_classification.append(item)
            
        new_classification_list.append(new_classification)
        
    #construct a map between title and classification result
    map_title_classification=dict(zip(real_title_list,new_classification_list))
    
    #statistics result tables of classification
    TitleAndClassification2Table(map_title_classification,tables_output_folder)
    
    #statistics result figures of classification
    ClassificationStatistics(map_title_classification,figures_output_folder)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: statistics_silt_classification.py Projeto: VB6Hobbyst7/Data-Statistics-Visualization

def SheetsClassification(xls_path,num_head_rows,num_head_columns,list_num_head_columns=None):
    
    print('')
    print('--Sheets Classification')
    
    plt.style.use('ggplot')
    
    #open the excel sheet to be operated on
    #formatting_info: keep the header format
    workbook=xlrd.open_workbook(xls_path,formatting_info=True)
    
    #copy former workbook
    new_workbook=copy(workbook)
        
    #construct output folder path
    tables_output_folder=xls_path.replace('.xls','').replace('input','output')+'\\分类\\'
    
    #generate output folder
    O_P.GenerateFolder(tables_output_folder)
    
    #save as
    new_workbook.save(tables_output_folder+'分类结果.xls')
    
    #construct map between sheet names and head rows
    list_sheet_names=list(workbook.sheet_names())
    
    #default
    if list_num_head_columns==None:
        
        list_num_head_columns=[num_head_columns]*len(list_sheet_names)
        
    map_sheet_names_num_head_columns=dict(zip(list_sheet_names,list_num_head_columns))    
    
    title_list=['粉土密实度分类',
                '粉土湿度分类',
                '黏性土状态分类',
                '土的分类',
                '备注']
    
    #traverse all sheets
    for this_sheet_name in workbook.sheet_names():
    
        #open a sheet
        this_sheet=new_workbook.get_sheet(this_sheet_name) 
        
        print('')
        print('...')
        print('......')
        print('->sheet name:',this_sheet_name)
        
        #construct output folder path
        figures_output_folder=xls_path.replace('.xls','').replace('input','output')+'\\分类\\图\\表 '+this_sheet_name+'\\'
        
        #generate output folder
        O_P.GenerateFolder(figures_output_folder)
        O_P.GenerateFolder(tables_output_folder)
        
        #Data Frame object
        channel=pd.read_excel(xls_path,sheet_name=this_sheet_name)
        
        final_head_columns,unit_list=O_H_C.HeadColumnsGeneration(channel,num_head_rows)
        
        #all info of dataframe
        value_matrix=channel.values
        
        #delete the repetition
        index_valid=O_L.ValidIndexList(value_matrix[num_head_rows:,1])
        
        #index of line where info starts
        start_info_row=num_head_rows+1   
             
        for k in range(len(final_head_columns)):
            
            this_head=final_head_columns[k]
            
            #search for note and make statistics
            if '备' in this_head or '注' in this_head:
                
                list_note=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_note=this_head
                
                print('-->head:'+head_note)
                
            #search for type of silt
            if '分类' in this_head:
                
                list_GB=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_GB=this_head
                
                print('-->head:'+head_GB)
                
            #search for pore ratio
            if 'e0' in this_head:
     
                list_e0=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_e0=this_head
                
                print('-->head:'+head_e0)
             
            #search for moisture content
            if 'ω0' in this_head:
                
                list_ω0=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_ω0=this_head 
                
                print('-->head:'+head_ω0)
                
            #search for liquidity index
            if 'IL' in this_head:
                
                list_IL=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_IL=this_head
                
                print('-->head:'+head_IL)
                
        #list of classification result
        classification_ω0=SiltMoistureClassification(list_ω0,num_head_rows)
        classification_e0=SiltCompactnessClassification(list_e0,num_head_rows)
        classification_IL=ClayeySiltStateClassification(list_IL,num_head_rows)
        classification_GB=cp.deepcopy(list_GB)
        classification_note=cp.deepcopy(list_note)
        
        #collect them into list
        classification_list=[classification_e0,
                             classification_ω0,
                             classification_IL,
                             classification_GB,
                             classification_note]
        #frequency to save
        list_frequency_map=[List2FrequencyMap(classification_list[ix]) for ix in range(len(title_list))]
        
        #construct new workbook   
        new_workbook=xlwt.Workbook(encoding='utf-8') 
        
        #construct new sheet
        new_sheet=new_workbook.add_sheet("总表")          
              
        #define the border style
        borders = xlwt.Borders()
        borders.left = 1
        borders.right = 1
        borders.top = 1
        borders.bottom = 1
        borders.bottom_colour=0x3A    
         
        style = xlwt.XFStyle()
        style.borders = borders
        
        #instant row
        row=0
        
        #title
        for k in range(len(title_list)):
            
            new_sheet.write(row,0,title_list[k],style)
            
            row+=1
            
            new_sheet.write(row,0,'总量',style)
            new_sheet.write(row,1,len(classification_list[k]),style)
            
            row+=1
            
#            print(list_frequency_map[k])
            
            for kk in range(len(list_frequency_map[k])):
                
                if isinstance(list(list_frequency_map[k].keys())[kk],str):
                    
                    new_sheet.write(row,0,list(list_frequency_map[k].keys())[kk],style)
                    
                else:
                    
                    new_sheet.write(row,0,'其他',style)
                    
                new_sheet.write(row,1,list(list_frequency_map[k].values())[kk],style)
                
                row+=1
                
            row+=1
                
        new_workbook.save(tables_output_folder+'统计总表.xls')
    
        #plus columns
        num_columns_plus=map_sheet_names_num_head_columns[this_sheet_name]-num_head_columns
        
        #write table head
        for this_title in title_list:
            
            num_columns_plus+=1
            
            this_sheet.write(num_head_rows,
                             np.shape(channel.values)[1]+num_columns_plus,
                             this_title,
                             style)
            
        #plus columns   
        num_columns_plus=map_sheet_names_num_head_columns[this_sheet_name]-num_head_columns  
        
        #write classification result    
        for this_classification in classification_list:
            
            num_columns_plus+=1
            
            for i in range(len(this_classification)):
                  
                this_sheet.write(i+start_info_row,
                                 np.shape(channel.values)[1]+num_columns_plus,
                                 this_classification[i],
                                 style)      
    
        #save as
        new_workbook.save(tables_output_folder+'分类结果.xls')
        
        #delete blank list
        real_title_list=O_L.CustomIndexList(title_list,O_L.DeleteBlankList(classification_list))
        real_classification_list=O_L.CustomIndexList(classification_list,O_L.DeleteBlankList(classification_list))
        
        #delete nan in classification list
        new_classification_list=[]
        
        for this_classification in real_classification_list:
            
            new_classification=[]
        
            for item in this_classification:
                
                if not isinstance(item,str):
                    
                    if np.isnan(item):
                        
    #                    print('nan')
                        
                        continue
                    
                new_classification.append(item)
                
            new_classification_list.append(new_classification)
            
        #construct a map between title and classification result
        map_title_classification=dict(zip(real_title_list,new_classification_list))
        
        #statistics result tables of classification
        TitleAndClassification2Table(map_title_classification,tables_output_folder)
        
        #statistics result figures of classification
        ClassificationStatistics(map_title_classification,figures_output_folder)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: statistics_silt_classification.py Projeto: VB6Hobbyst7/Data-Statistics-Visualization

def WorkbookClassification(xls_path,num_head_rows,num_head_columns):
    
    print('')
    print('--Workbook Classification')
    
    plt.style.use('ggplot')
    
    #open the excel sheet to be operated on
    #formatting_info: keep the header format
    workbook=xlrd.open_workbook(xls_path,formatting_info=True)
    
    #construct output folder path
    tables_output_folder=xls_path.replace('.xls','').replace('input','output')+'\\分类\\'
    figures_output_folder=xls_path.replace('.xls','').replace('input','output')+'\\分类\\图\\总图\\'
    
    #generate output folder
    O_P.GenerateFolder(figures_output_folder)
  
    #construct map between sheet names and head rows
    list_sheet_names=list(workbook.sheet_names())
         
    title_list=['粉土密实度分类',
                '粉土湿度分类',
                '黏性土状态分类',
                '土的分类',
                '备注']
    
    #classification result list
    classification_ω0=[]
    classification_e0=[]
    classification_IL=[]
    classification_GB=[]
    classification_note=[]
    
    #classification result list
    classification_ω0,classification_e0,classification_IL=[],[],[]
    
    #traverse all sheets
    for this_sheet_name in list_sheet_names[-1:]:
            
        print('')
        print('...')
        print('......')
        print('->sheet name:',this_sheet_name)
        
        #Data Frame object
        channel=pd.read_excel(xls_path,sheet_name=this_sheet_name)
        
        final_head_columns,unit_list=O_H_C.HeadColumnsGeneration(channel,num_head_rows)
        
        #all info of dataframe
        value_matrix=channel.values
        
        #delete the repetition
        index_valid=O_L.ValidIndexList(value_matrix[num_head_rows:,1])
        
        print('-->Valid Samples:',len(index_valid))
        
        for k in range(len(final_head_columns)):
            
            this_head=final_head_columns[k]
            
            #search for note and make statistics
            if '备' in this_head or '注' in this_head:
                
                list_note=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_note=this_head
                
                print('-->head:'+head_note)
                
            #search for type of silt
            if '分类' in this_head:
                
                list_GB=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_GB=this_head
                
                print('-->head:'+head_GB)
                
            #search for pore ratio
            if 'e0' in this_head:
     
                list_e0=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_e0=this_head
                
                print('-->head:'+head_e0)
             
            #search for moisture content
            if 'ω0' in this_head:
                
                list_ω0=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_ω0=this_head 
                
                print('-->head:'+head_ω0)
                
            #search for liquidity index
            if 'IL' in this_head:
                
                list_IL=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_IL=this_head
                
                print('-->head:'+head_IL)

        #filter floury soil
        index_floury_soil=O_L.GBIndexFlourySoil(list_GB)
        
        ω0_valid=O_L.CustomIndexList(list_ω0,index_floury_soil)
        e0_valid=O_L.CustomIndexList(list_e0,index_floury_soil)

        #filter cohesive silt
        index_cohesive_silt=O_L.GBIndexCohesiveSilt(list_GB)
        
        IL_valid=O_L.CustomIndexList(list_IL,index_cohesive_silt)
     
        #list of classification result
        #floury soil
        classification_ω0+=SiltMoistureClassification(ω0_valid,num_head_rows)
        classification_e0+=SiltCompactnessClassification(e0_valid,num_head_rows)
        
        #cohesive silt
        classification_IL+=ClayeySiltStateClassification(IL_valid,num_head_rows)
        
        #GB
        classification_GB+=list_GB
        
        #note
        classification_note+=list_note
        
    #collect them into list
    classification_list=[classification_e0,
                         classification_ω0,
                         classification_IL,
                         classification_GB,
                         classification_note]
    #frequency to save
    list_frequency_map=[List2FrequencyMap(classification_list[ix]) for ix in range(len(title_list))]
    
    #construct new workbook   
    new_workbook=xlwt.Workbook(encoding='utf-8') 
    
    #construct new sheet
    new_sheet=new_workbook.add_sheet("总表")          
          
    #define the border style
    borders = xlwt.Borders()
    borders.left = 1
    borders.right = 1
    borders.top = 1
    borders.bottom = 1
    borders.bottom_colour=0x3A    
     
    style = xlwt.XFStyle()
    style.borders = borders
    
    #instant row
    row=0
    
    #title
    for k in range(len(title_list)):
        
        new_sheet.write(row,0,title_list[k],style)
        
        row+=1
        
        new_sheet.write(row,0,'总量',style)
        new_sheet.write(row,1,len(classification_list[k]),style)
        
        row+=1
        
#        print(list_frequency_map[k])
        
        for kk in range(len(list_frequency_map[k])):
            
            if isinstance(list(list_frequency_map[k].keys())[kk],str):
                
                new_sheet.write(row,0,list(list_frequency_map[k].keys())[kk],style)
                
            else:
                
                new_sheet.write(row,0,'其他',style)
                
            new_sheet.write(row,1,list(list_frequency_map[k].values())[kk],style)
            
            row+=1
            
        row+=1
            
    new_workbook.save(tables_output_folder+'统计总表.xls')
    
    #delete blank list
    real_title_list=O_L.CustomIndexList(title_list,O_L.DeleteBlankList(classification_list))
    real_classification_list=O_L.CustomIndexList(classification_list,O_L.DeleteBlankList(classification_list))
    
    #delete nan in classification list
    new_classification_list=[]
    
    for this_classification in real_classification_list:
        
        new_classification=[]
    
        for item in this_classification:
            
            if not isinstance(item,str):
                
                if np.isnan(item):
                    
#                    print('nan')
                    
                    continue
                
            new_classification.append(item)
            
        new_classification_list.append(new_classification)
        
    #construct a map between title and classification result
    map_title_classification=dict(zip(real_title_list,new_classification_list))
    
    #statistics result tables of classification
    TitleAndClassification2Table(map_title_classification,tables_output_folder)
    
    #statistics result figures of classification
    ClassificationStatistics(map_title_classification,figures_output_folder)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: statistics_pressure_consolidation.py Projeto: VB6Hobbyst7/Data-Statistics-Visualization

def WorkbookCondolidation(xls_path, num_head_rows, num_head_columns):

    print('')
    print('--Workbook Condolidation')

    #plt.style.use('ggplot')

    #construct output folder path
    figures_output_folder = xls_path.replace('.xls', '').replace(
        'input', 'output') + '\\'

    #generate output folder
    list_threshold = [
        '0-100', '100-200', '200-400', '400-800', '800-1600', '1600-3200'
    ]

    for this_threshold in list_threshold:

        O_P.GenerateFolder(figures_output_folder + '先期固结压力\\' +
                           this_threshold + '\\')

    #open the excel sheet to be operated on
    #formatting_info: keep the header format
    workbook = xlrd.open_workbook(xls_path, formatting_info=True)

    #construct map between sheet names and head rows
    list_sheet_names = list(workbook.sheet_names())

    #data throughout workbook
    Pc_high_pressure_workbook = []

    #traverse all sheets
    for this_sheet_name in list_sheet_names:

        print('')
        print('...')
        print('......')
        print('->sheet name:', this_sheet_name)
        print('')

        #Data Frame object
        channel = pd.read_excel(xls_path, sheet_name=this_sheet_name)

        final_head_columns, unit_list = O_H_C.HeadColumnsGeneration(
            channel, num_head_rows)

        #print(final_head_columns)

        #all info of dataframe
        value_matrix = channel.values

        #delete the repetition
        index_valid = O_L.ValidIndexList(value_matrix[num_head_rows:, 1])

        #fetch the id of P and e
        index_e_high = []

        #pressure
        P_high = []

        index_list = [1, 2, 3]

        #hole id, start depth, end depth
        list_hole_id,\
        list_start_depth,\
        list_end_depth=[O_L.CustomIndexList(list(value_matrix[num_head_rows:,this_index]),index_valid) for this_index in index_list]

        for k in range(num_head_columns, np.shape(value_matrix)[1]):

            #title str
            title = final_head_columns[k]

            if '各级压力下的孔隙比' in title and '高压固结' in title:

                print(k, title)

                index_e_high.append(k)
                P_high.append(float(title.strip().split(' ')[1]))

        #matrix to contain grain partition proportion
        data_e_high = np.zeros((len(index_valid), len(index_e_high)))

        column = 0

        for this_index in index_e_high:

            data_e_high[:, column] = O_L.CustomIndexList(
                list(value_matrix[num_head_rows:, this_index]), index_valid)

            column += 1

        Pc_high_pressure = []

        #high pressure
        for i in range(np.shape(data_e_high)[0]):

            expire_nan_index_list = O_L.ExpireNanIndexList(data_e_high[i])

            this_e = O_L.CustomIndexList(list(data_e_high[i]),
                                         expire_nan_index_list)
            this_P = O_L.CustomIndexList(P_high, expire_nan_index_list)

            #construct new data object
            that_data = data()

            that_data.hole_id = list_hole_id[i]
            that_data.end_depth = list_end_depth[i]
            that_data.start_depth = list_start_depth[i]
            that_data.porosity_compression = this_e
            that_data.pressure_compression = this_P

            Pc_high_pressure.append(
                that_data.ConsolidationCurve(figures_output_folder +
                                             '先期固结压力\\'))

        Pc_high_pressure_sheet = []

        for j in range(len(index_valid)):

            if Pc_high_pressure[j] is not None:

                Pc_high_pressure_sheet.append(Pc_high_pressure[j])

        Pc_high_pressure_workbook += Pc_high_pressure_sheet

    fig, ax = plt.subplots(figsize=(8, 8))

    #for iteration
    list_Pc_worbook = [Pc_high_pressure_workbook]
    list_title = ['高压固结']

    for k in range(len(list_title)):

        #Pc, list title, folder name
        Pc_workbook = list_Pc_worbook[k]
        Pc_title = list_title[k]

        if Pc_title == '':

            continue

        group = np.linspace(min(Pc_workbook), max(Pc_workbook), 20)

        title_font = FontProperties(fname=r"C:\Windows\Fonts\simhei.ttf",
                                    size=16)
        label_font = FontProperties(fname=r"C:\Windows\Fonts\simhei.ttf",
                                    size=13)

        #plot histogram
        plt.hist(Pc_workbook, group, histtype='bar', rwidth=0.95)

        plt.title(Pc_title + '先期固结压力频数分布直方图\n样本总量:' +
                  str(int(len(Pc_workbook))),
                  FontProperties=title_font)

        plt.xlabel('先期固结压力(kPa)', FontProperties=label_font)

        #list of frequency
        frequency = [0] * (len(group) - 1)

        #mannual histogram
        for this_valid_data in Pc_workbook:

            for g in range(len(group) - 1):

                if group[g] <= this_valid_data <= group[g + 1]:

                    frequency[g] += 1

                    break

        ax.yaxis.set_major_locator(
            MultipleLocator(
                int(np.ceil((max(frequency) - min(frequency)) / 20))))

        #set ticks
        plt.tick_params(labelsize=15)
        labels = ax.get_xticklabels() + ax.get_yticklabels()

        #label fonts
        for this_label in labels:

            this_label.set_fontname('Times New Roman')

        fig_path = figures_output_folder + '先期固结压力\\先期固结压力值分布.png'

        plt.savefig(fig_path, dpi=300, bbox_inches='tight')
        plt.close()

Exemplo n.º 5

0

Exibir arquivo

Arquivo: statistics_general_variable.py Projeto: jerryweihuajing/Data-Statistics-Visualization

def MergedWorkbookStatistics(list_xls_path, num_head_rows, num_head_columns):

    print('')
    print('--Merged Workbook Statistics')

    plt.style.use('ggplot')

    #construct output folder path
    tables_output_folder = list_xls_path[0].split(
        'input')[0] + 'output\\颗分汇总\\统计\\'

    #construct output folder path
    figures_output_folder = list_xls_path[0].split(
        'input')[0] + 'output\\颗分汇总\\统计\\图\\总图\\'

    #generate output folder
    O_P.GenerateFolder(tables_output_folder)
    O_P.GenerateFolder(figures_output_folder)

    #DF channels
    total_channels = []

    for this_xls_path in list_xls_path:

        #open the excel sheet to be operated on
        #formatting_info: keep the header format
        workbook = xlrd.open_workbook(this_xls_path, formatting_info=True)

        #construct map between sheet names and head rows
        list_sheet_names = list(workbook.sheet_names())

        #traverse all sheets
        for this_sheet_name in list_sheet_names:

            #Data Frame object
            that_channel = pd.read_excel(this_xls_path,
                                         sheet_name=this_sheet_name)

            #collect it
            total_channels.append(that_channel)
    '''title and data throughout whole workbook'''
    list_title = []
    list_data = []
    list_unit = []

    #check if the repetition exists
    total_id = []

    #traverse all sheets
    for channel in total_channels:

        print('')
        print('...')
        print('......')
        print('')

        final_head_columns, unit_list = O_H_C.HeadColumnsGeneration(
            channel, num_head_rows)

        #print(final_head_columns)

        #all info of dataframe
        value_matrix = channel.values

        title_font = FontProperties(fname=r"C:\Windows\Fonts\simhei.ttf",
                                    size=16)
        label_font = FontProperties(fname=r"C:\Windows\Fonts\simhei.ttf",
                                    size=13)
        '''complete info of statistics'''

        #item names of statistics
        statistic_items = ['数据量', '最大值', '最小值', '平均值', '标准差', '变异系数', '标准值']

        #columns to delete
        columns_to_delete = []

        #no valid data
        columns_void = []

        #delete the repetition and remove label R
        index_valid = O_L.ValidIndexList(value_matrix[num_head_rows:, 1])

        total_id += (list(value_matrix[num_head_rows:, 1]))

        print('-->Total Samples:', len(value_matrix[num_head_rows:, 1]))
        print('-->Valid Samples:', len(index_valid))

        for k in range(num_head_columns, np.shape(value_matrix)[1]):

            #num of steps
            n_step = 20

            #fetch the data
            data = list(value_matrix[num_head_rows:, k])

            #unit str
            unit = unit_list[k]

            #title str
            title = final_head_columns[k]

            #valid data
            list_data.append(O_L.CustomIndexList(data, index_valid))
            list_title.append(title)
            list_unit.append(unit)

    print('')
    print('...')
    print('......')
    print('Merged Workbook')
    print('-->Total Samples:', len(total_id))
    print('-->Valid Samples:', len(O_L.ValidIndexList(total_id)))
    print('')

    #map between title and data
    map_title_data = {}

    for k in range(len(list_title)):

        this_title = list_title[k]
        this_data = list_data[k]

        if this_title in list(map_title_data.keys()):

            map_title_data[this_title] += this_data

        else:

            map_title_data[this_title] = this_data

    #new matrix to contain statistic result
    statistic = np.zeros((len(map_title_data), len(statistic_items)))

    #traverse the dictionary
    for k in range(len(map_title_data)):

        title = list(map_title_data.keys())[k]
        data = list(map_title_data.values())[k]
        unit = '(' + list_unit[k] + ')'

        #expire particular conditions
        if '分类' in title or '备' in title or '注' in title:

            columns_to_delete.append(title)

            continue

        #expire nan
        valid_data = [
            float(this_data) for this_data in data
            if not np.isnan(float(this_data))
        ]

        print(k, title, unit)

        if valid_data == []:

            columns_void.append(title)

            continue

        #x coordinates
        group = np.linspace(min(valid_data), max(valid_data), n_step)

        #whether to process
        scaled_flag = False

        #exception processing
        for this_tick in group:

            if 'e' in str(this_tick):

                factor = str(min(group)).split('e')[-1]

                scaled_flag = True

                break

        fig, ax = plt.subplots(figsize=(8, 8))

        if scaled_flag:

            #mutiply a factor
            valid_data = np.array(valid_data) / 10**(int(factor))

            group = np.linspace(min(valid_data), max(valid_data), n_step)

            #plot histogram
            ax.hist(valid_data, group, histtype='bar', rwidth=0.95)

            plt.title(title + ' 频数分布直方图\n样本总量:' + str(int(len(valid_data))),
                      FontProperties=title_font)

            plt.xlabel(title + ' e' + factor + ' ' + unit,
                       FontProperties=label_font)

        else:

            #plot histogram
            ax.hist(valid_data, group, histtype='bar', rwidth=0.95)

            plt.title(title + ' 频数分布直方图\n样本总量:' + str(int(len(valid_data))),
                      FontProperties=title_font)

            plt.xlabel(title + ' ' + unit, FontProperties=label_font)

        #list of frequency
        frequency = [0] * (len(group) - 1)

        #mannual histogram
        for this_valid_data in valid_data:

            for g in range(len(group) - 1):

                if group[g] <= this_valid_data <= group[g + 1]:

                    frequency[g] += 1

                    break

        ax.yaxis.set_major_locator(
            MultipleLocator(
                int(np.ceil((max(frequency) - min(frequency)) / n_step))))

        #set ticks
        plt.tick_params(labelsize=15)
        labels = ax.get_xticklabels() + ax.get_yticklabels()

        #label fonts
        for this_label in labels:

            this_label.set_fontname('Times New Roman')

        #amount
        data_amount = len(valid_data)

        #maximum
        data_maximum = np.max(valid_data)

        #minimum
        data_minimum = np.min(valid_data)

        #average
        data_average = np.mean(valid_data)

        #standard deviation
        data_standard_deviation = C_F_V.StandardDeviation(valid_data)

        #variable coefficient
        data_variable_coefficient = C_F_V.VariableCoefficient(valid_data)

        #standard value
        data_standard_value = C_F_V.StandardValue(valid_data)

        #give the value
        statistic[k, 0] = round(data_amount, 3)
        statistic[k, 1] = round(data_maximum, 3)
        statistic[k, 2] = round(data_minimum, 3)
        statistic[k, 3] = round(data_average, 3)

        if statistic[k, 0] != 1:

            statistic[k, 4] = round(data_standard_deviation, 3)
            statistic[k, 5] = round(data_variable_coefficient, 3)
            statistic[k, 6] = round(data_standard_value, 3)

        #valid file name
        if '<' in title:

            title = title.replace('<', '小于')

        if '>' in title:

            title = title.replace('>', '大于')

        fig_path = figures_output_folder + title + '.png'

        #save the fig
        plt.savefig(fig_path, dpi=300, bbox_inches='tight')
        plt.close()

    #construct new workbook
    new_workbook = xlwt.Workbook(encoding='utf-8')

    #construct new sheet
    new_sheet = new_workbook.add_sheet("总表")

    #define the border style
    borders = xlwt.Borders()
    borders.left = 1
    borders.right = 1
    borders.top = 1
    borders.bottom = 1
    borders.bottom_colour = 0x3A

    style = xlwt.XFStyle()
    style.borders = borders

    #decoration
    plus = 1

    #title
    new_sheet.write(0, 0, '特征值', style)

    #header
    for kk in range(len(map_title_data)):

        this_title = list(map_title_data.keys())[kk]

        new_sheet.write(kk + 1, 0, this_title, style)

    #items
    for kk in range(len(statistic_items)):

        this_item = statistic_items[kk]

        new_sheet.write(0, kk + plus, this_item, style)

    for i in range(len(statistic_items)):

        for j in range(len(map_title_data)):

            if statistic[j][i] == 0:

                new_sheet.write(j + 1, i + plus, '', style)

            else:

                try:

                    new_sheet.write(j + 1, i + plus, statistic[j][i], style)

                #transform int to float
                except:

                    new_sheet.write(j + plus, i + plus, float(statistic[j][i]),
                                    style)

    new_workbook.save(tables_output_folder + '统计总表.xls')

Exemplo n.º 6

0

Exibir arquivo

Arquivo: statistics_general_variable.py Projeto: jerryweihuajing/Data-Statistics-Visualization

def SheetsStatistics(xls_path,
                     num_head_rows,
                     num_head_columns,
                     list_num_head_columns=None):

    print('')
    print('--Sheets Statistics')

    plt.style.use('ggplot')

    #open the excel sheet to be operated on
    #formatting_info: keep the header format
    workbook = xlrd.open_workbook(xls_path, formatting_info=True)

    #copy former workbook
    new_workbook = copy(workbook)

    #construct output folder path
    tables_output_folder = xls_path.replace('.xls', '').replace(
        'input', 'output') + '\\统计\\'

    #generate output folder
    O_P.GenerateFolder(tables_output_folder)

    #save as
    new_workbook.save(tables_output_folder + '统计结果.xls')

    #construct map between sheet names and head rows
    list_sheet_names = list(workbook.sheet_names())

    #default
    if list_num_head_columns == None:

        list_num_head_columns = [num_head_columns] * len(list_sheet_names)

    map_sheet_names_num_head_columns = dict(
        zip(list_sheet_names, list_num_head_columns))

    #traverse all sheets
    for this_sheet_name in workbook.sheet_names():

        print('')
        print('...')
        print('......')
        print('->sheet name:', this_sheet_name)
        print('')

        #construct output folder path
        figures_output_folder = xls_path.replace('.xls', '').replace(
            'input', 'output') + '\\统计\\图\\表 ' + this_sheet_name + '\\'

        #generate output folder
        O_P.GenerateFolder(figures_output_folder)

        #Data Frame object
        channel = pd.read_excel(xls_path, sheet_name=this_sheet_name)

        final_head_columns, unit_list = O_H_C.HeadColumnsGeneration(
            channel, num_head_rows)

        #print(final_head_columns)

        #all info of dataframe
        value_matrix = channel.values

        title_font = FontProperties(fname=r"C:\Windows\Fonts\simhei.ttf",
                                    size=16)
        label_font = FontProperties(fname=r"C:\Windows\Fonts\simhei.ttf",
                                    size=13)
        '''complete info of statistics'''

        #item names of statistics
        statistic_items = ['数据量', '最大值', '最小值', '平均值', '标准差', '变异系数', '标准值']

        #new dataframe to store statistic data
        statistic = cp.deepcopy(channel.iloc[:len(statistic_items)])

        #columns to delete
        columns_to_delete = []

        #no valid data
        columns_void = []

        #delete the repetition
        index_valid = O_L.ListWithoutRepetition(value_matrix[num_head_rows:,
                                                             1])

        print('-->Total Samples:', len(value_matrix[num_head_rows:, 1]))
        print('-->Valid Samples:', len(index_valid))

        for k in range(num_head_columns, np.shape(value_matrix)[1]):

            #num of steps
            n_step = 20

            #fetch the data
            data = O_L.CustomIndexList(list(value_matrix[num_head_rows:, k]),
                                       index_valid)

            #unit str
            unit = '(' + unit_list[k] + ')'

            #title str
            title = final_head_columns[k]

            #expire particular conditions
            if '分类' in title or '备' in title or '注' in title:

                #give the value
                statistic.iloc[0, k] = ''
                statistic.iloc[1, k] = ''
                statistic.iloc[2, k] = ''
                statistic.iloc[3, k] = ''
                statistic.iloc[4, k] = ''
                statistic.iloc[5, k] = ''
                statistic.iloc[6, k] = ''

                columns_to_delete.append(title)

                continue

            #expire nan
            valid_data = [
                float(this_data) for this_data in data
                if not np.isnan(float(this_data))
            ]

            print(k, title, unit)

            if valid_data == []:

                #give the value
                statistic.iloc[0, k] = ''
                statistic.iloc[1, k] = ''
                statistic.iloc[2, k] = ''
                statistic.iloc[3, k] = ''
                statistic.iloc[4, k] = ''
                statistic.iloc[5, k] = ''
                statistic.iloc[6, k] = ''

                columns_void.append(title)

                continue

            #x coordinates
            group = np.linspace(min(valid_data), max(valid_data), n_step)

            #whether to process
            scaled_flag = False

            #exception processing
            for this_tick in group:

                if 'e' in str(this_tick):

                    factor = str(min(group)).split('e')[-1]

                    scaled_flag = True

                    break

            fig, ax = plt.subplots(figsize=(8, 8))

            if scaled_flag:

                #mutiply a factor
                valid_data = np.array(valid_data) / 10**(int(factor))

                group = np.linspace(min(valid_data), max(valid_data), n_step)

                #plot histogram
                plt.hist(valid_data, group, histtype='bar', rwidth=0.95)

                plt.title(title + ' 频数分布直方图\n样本总量:' +
                          str(int(len(valid_data))),
                          FontProperties=title_font)

                plt.xlabel(title + ' e' + factor + ' ' + unit,
                           FontProperties=label_font)

            else:

                #plot histogram
                plt.hist(valid_data, group, histtype='bar', rwidth=0.95)

                plt.title(title + ' 频数分布直方图\n样本总量:' +
                          str(int(len(valid_data))),
                          FontProperties=title_font)

                plt.xlabel(title + ' ' + unit, FontProperties=label_font)

            #list of frequency
            frequency = [0] * (len(group) - 1)

            #mannual histogram
            for this_valid_data in valid_data:

                for g in range(len(group) - 1):

                    if group[g] <= this_valid_data <= group[g + 1]:

                        frequency[g] += 1

                        break

            ax.yaxis.set_major_locator(
                MultipleLocator(
                    int(np.ceil((max(frequency) - min(frequency)) / n_step))))

            #set ticks
            plt.tick_params(labelsize=15)
            labels = ax.get_xticklabels() + ax.get_yticklabels()

            #label fonts
            for this_label in labels:

                this_label.set_fontname('Times New Roman')

            #amount
            data_amount = len(valid_data)

            #maximum
            data_maximum = np.max(valid_data)

            #minimum
            data_minimum = np.min(valid_data)

            #average
            data_average = np.mean(valid_data)

            #standard deviation
            data_standard_deviation = C_F_V.StandardDeviation(valid_data)

            #variable coefficient
            data_variable_coefficient = C_F_V.VariableCoefficient(valid_data)

            #standard value
            data_standard_value = C_F_V.StandardValue(valid_data, '-')

            #give the value
            statistic.iloc[0, k] = data_amount
            statistic.iloc[1, k] = data_maximum
            statistic.iloc[2, k] = data_minimum
            statistic.iloc[3, k] = data_average
            statistic.iloc[4, k] = data_standard_deviation
            statistic.iloc[5, k] = data_variable_coefficient
            statistic.iloc[6, k] = data_standard_value

            #valid file name
            if '<' in title:

                title = title.replace('<', '小于')

            if '>' in title:

                title = title.replace('>', '大于')

            fig_path = figures_output_folder + title + '.png'

            #save the fig
            plt.savefig(fig_path, dpi=300, bbox_inches='tight')
            plt.close()

        #statistics decoration
        for k in range(len(statistic_items)):

            statistic.iloc[k, 1] = statistic_items[k]

        #delete one column
        statistic = statistic.drop(statistic.columns[0], axis=1, index=None)

        #rename column
        statistic = statistic.rename(columns={statistic.columns[1]: '特征值'})

        #index of line where info starts
        start_info_row = num_head_rows + 1

        #open a sheet
        this_sheet = new_workbook.get_sheet(this_sheet_name)

        #total lines
        num_info_rows = len(this_sheet.rows)

        #blank row
        one_list = [''] * (len(channel.iloc[:1].columns) + 2)

        #define the border style
        borders = xlwt.Borders()
        borders.left = 1
        borders.right = 1
        borders.top = 1
        borders.bottom = 1
        borders.bottom_colour = 0x3A

        style = xlwt.XFStyle()
        style.borders = borders

        #fill with blank lines
        for ii in range(num_info_rows):

            for jj in range(len(one_list)):

                this_sheet.write(ii + start_info_row, jj, one_list[jj])
        '''Data frame reads data and automatically ignores empty rows and columns'''
        for i in range(statistic.shape[0]):

            for j in range(statistic.shape[1]):

                try:

                    this_sheet.write(
                        i + start_info_row,
                        j + map_sheet_names_num_head_columns[this_sheet_name],
                        statistic.iloc[i, j], style)

                #transform int to float
                except:

                    this_sheet.write(
                        i + start_info_row,
                        j + map_sheet_names_num_head_columns[this_sheet_name],
                        float(statistic.iloc[i, j]), style)

        new_workbook.save(tables_output_folder + '统计结果.xls')

Exemplo n.º 7

0

Exibir arquivo

def WorkbookDiameter(xls_path, num_head_rows, num_head_columns):

    print('')
    print('-- Workbook Diameter')

    #plt.style.use('ggplot')

    #construct output folder path
    output_folder = xls_path.replace('.xls', '').replace('input',
                                                         'output') + '\\粒径曲线\\'

    #generate output folder
    O_P.GenerateFolder(output_folder)

    #open the excel sheet to be operated on
    #formatting_info: keep the header format
    workbook = xlrd.open_workbook(xls_path, formatting_info=True)

    #construct map between sheet names and head rows
    list_sheet_names = list(workbook.sheet_names())

    #traverse all sheets
    for this_sheet_name in list_sheet_names[:-1]:

        O_P.GenerateFolder(output_folder + this_sheet_name + '\层划分集合\\')
        O_P.GenerateFolder(output_folder + this_sheet_name + '\孔划分集合\\')
        O_P.GenerateFolder(output_folder + this_sheet_name + '\层划分\\')
        O_P.GenerateFolder(output_folder + this_sheet_name + '\孔划分\\')
        O_P.GenerateFolder(output_folder + this_sheet_name + '\孔集合\\')
        O_P.GenerateFolder(output_folder + this_sheet_name + '\孔\\')
        O_P.GenerateFolder(output_folder + this_sheet_name + '\层\\')

        print('')
        print('...')
        print('......')
        print('-> sheet name:', this_sheet_name)
        print('')

        #Data Frame object
        channel = pd.read_excel(xls_path, sheet_name=this_sheet_name)

        final_head_columns, unit_list = O_H_C.HeadColumnsGeneration(
            channel, num_head_rows)

        #        print(final_head_columns)

        #all info of dataframe
        value_matrix = channel.values

        #        '''special condition'''
        #        num_head_rows-=1

        #delete the repetition
        index_valid = O_L.ValidIndexList(value_matrix[num_head_rows:, 1])

        index_diameter = []

        for k in range(num_head_columns, np.shape(value_matrix)[1]):

            #title str
            title = final_head_columns[k]

            if '颗' in title\
            and '粒' in title\
            and '分' in title\
            and '析' in title\
            and 'mm' in title:

                print(k, title)

                index_diameter.append(k)

        index_list = [0, 1, 2, 3]

        #indoor id, hole id, start depth, end depth,
        #pore aperture, consolidation pressure, compression index, resilience index
        list_indoor_id,\
        list_hole_id,\
        list_start_depth,\
        list_end_depth=[O_L.CustomIndexList(list(value_matrix[num_head_rows:,this_index]),index_valid) for this_index in index_list]

        #matrix to contain grain partition proportion
        layers_diameter = np.zeros((len(index_valid), len(index_diameter)))

        column = 0

        for this_index in index_diameter:

            layers_diameter[:, column] = O_L.CustomIndexList(
                list(value_matrix[num_head_rows:, this_index]), index_valid)

            column += 1

        #construct data objects
        list_layer = []

        for i in range(np.shape(layers_diameter)[0]):

            new_layer = data()

            new_layer.hole_id = list_hole_id[i]
            new_layer.indoor_id = list_indoor_id[i]
            new_layer.end_depth = list_end_depth[i]
            new_layer.start_depth = list_start_depth[i]

            new_layer.list_diameter = cp.deepcopy(diameter_range)
            new_layer.list_diameter_percentage = list(layers_diameter[i, :])

            list_bool = [
                np.isnan(this_percentage)
                for this_percentage in new_layer.list_diameter_percentage
            ]

            #expire list with all nan
            if list_bool == len(list_bool) * [True]:

                continue

            #calculate the cumulative percentage
            new_layer.list_diameter_percentage_cumulative = []

            for s in range(len(new_layer.list_diameter_percentage)):

                this_cumulative_percentage = np.sum([
                    this_percentage for this_percentage in
                    new_layer.list_diameter_percentage[s:]
                    if not np.isnan(this_percentage)
                ])

                new_layer.list_diameter_percentage_cumulative.append(
                    this_cumulative_percentage)

            list_layer.append(new_layer)

            #Generate hole list
            list_hole = Layer2Hole(list_layer)

            #Generate range layer list
            list_range_layer = Data2RangeData(list_layer)

            #Generate range hole list
            list_range_hole = Data2RangeData(list_hole)

        #output the visualization
        for this_layer in list_layer:

            this_layer.DiameterCurve(output_folder + this_sheet_name + '\层\\')

        for this_hole in list_hole:

            this_hole.DiameterCurve(output_folder + this_sheet_name + '\孔\\')
            this_hole.DiameterCurveBatch(output_folder + this_sheet_name +
                                         '\孔集合\\')

        for this_range_layer in list_range_layer:

            this_range_layer.DiameterCurve(output_folder + this_sheet_name +
                                           '\层划分\\')
            this_range_layer.DiameterCurveBatch(output_folder +
                                                this_sheet_name + '\层划分集合\\')

        for this_range_hole in list_range_hole:

            this_range_hole.DiameterCurve(output_folder + this_sheet_name +
                                          '\孔划分\\')
            this_range_hole.DiameterCurveBatch(output_folder +
                                               this_sheet_name + '\孔划分集合\\')

Exemplo n.º 8

0

Exibir arquivo

def WorkbookResilience(xls_path, num_head_rows, num_head_columns):

    print('')
    print('--Workbook Resilience')

    #plt.style.use('ggplot')

    #construct output folder path
    output_folder = xls_path.replace('.xls', '').replace(
        'input', 'output') + '\\先期固结压力\\回弹\\'

    #generate output folder
    O_P.GenerateFolder(output_folder)

    #open the excel sheet to be operated on
    #formatting_info: keep the header format
    workbook = xlrd.open_workbook(xls_path, formatting_info=True)

    #construct map between sheet names and head rows
    list_sheet_names = list(workbook.sheet_names())

    #construct new workbook
    new_workbook = xlwt.Workbook(encoding='utf-8')

    #construct new sheet
    new_sheet = new_workbook.add_sheet("总表")

    #define the border style
    borders = xlwt.Borders()
    borders.left = 1
    borders.right = 1
    borders.top = 1
    borders.bottom = 1
    borders.bottom_colour = 0x3A

    style = xlwt.XFStyle()
    style.borders = borders

    #traverse all sheets
    for this_sheet_name in list_sheet_names:

        print('')
        print('...')
        print('......')
        print('->sheet name:', this_sheet_name)
        print('')

        #Data Frame object
        channel = pd.read_excel(xls_path, sheet_name=this_sheet_name)

        final_head_columns, unit_list = O_H_C.HeadColumnsGeneration(
            channel, num_head_rows)

        #        print(final_head_columns)

        #all info of dataframe
        value_matrix = channel.values
        '''special condition'''
        num_head_rows -= 1

        #delete the repetition
        index_valid = O_L.ValidIndexList(value_matrix[num_head_rows:, 1])

        #fetch the id of P and e
        index_settlement_compression = []
        index_settlement_resilience = []
        index_settlement_recompression = []

        #pressure
        pressure_compression = []
        pressure_resilience = []
        pressure_recompression = []

        for k in range(num_head_columns, np.shape(value_matrix)[1]):

            #title str
            title = final_head_columns[k]

            if 'PC' in title:

                print(k, title)

                index_pressure_consolidation = k

            if '压缩指数' in title:

                print(k, title)

                index_index_compression = k

            if '回弹指数' in title:

                print(k, title)

                index_index_resilience = k

            if '孔隙比' in title:

                print(k, title)

                index_porosity_original = k

            if '一定压力固结沉降量' in title:

                print(k, title)

                index_settlement_compression.append(k)

                pressure_compression.append(
                    float(title.strip().split(' ')[1].replace('kPa', '')))

            if '回弹固结沉降量' in title:

                print(k, title)

                index_settlement_resilience.append(k)
                pressure_resilience.append(float(title.strip().split(' ')[1]))

            if '再压缩固结沉降量' in title:

                if 'PC' in title or '指数' in title:

                    continue

                print(k, title)

                index_settlement_recompression.append(k)
                pressure_recompression.append(
                    float(title.strip().split(' ')[1]))

        index_list = [
            0, 1, 2, 3, index_index_compression, index_index_resilience,
            index_porosity_original, index_pressure_consolidation
        ]

        #indoor id, hole id, start depth, end depth,
        #pore aperture, consolidation pressure, compression index, resilience index
        list_indoor_id,\
        list_hole_id,\
        list_start_depth,\
        list_end_depth,\
        list_index_compression,\
        list_index_resilience,\
        list_porosity_original,\
        list_pressure_consolidation=[O_L.CustomIndexList(list(value_matrix[num_head_rows:,this_index]),index_valid) for this_index in index_list]

        #settlement volume
        list_index = [
            index_settlement_compression, index_settlement_resilience,
            index_settlement_recompression
        ]

        list_data = []

        for this_index_list in list_index:

            #matrix to contain grain partition proportion
            this_data = np.zeros((len(index_valid), len(this_index_list)))

            column = 0

            for this_index in this_index_list:

                this_data[:, column] = O_L.CustomIndexList(
                    list(value_matrix[num_head_rows:, this_index]),
                    index_valid)

                column += 1

            list_data.append(this_data)

        data_settlement_compression,\
        data_settlement_resilience,\
        data_settlement_recompression=list_data

        #construct data object
        for i in range(len(index_valid)):

            that_data = data()

            that_data.hole_id = list_hole_id[i]
            that_data.indoor_id = list_indoor_id[i]
            that_data.end_depth = list_end_depth[i]
            that_data.start_depth = list_start_depth[i]
            that_data.porosity_original = list_porosity_original[i]
            that_data.pressure_consolidation = list_pressure_consolidation[i]
            that_data.index_compression = list_index_compression[i]
            that_data.index_resilience = list_index_resilience[i]

            print('')
            print('...')
            print('......')
            print('Hole ID:', that_data.hole_id)
            '''calculate a and e of compression'''
            that_data.pressure_compression = pressure_compression
            that_data.settlement_compression = data_settlement_compression[i]

            #difference of s and p
            diff_p=np.array(that_data.pressure_compression[1:])\
                  -np.array(that_data.pressure_compression[:-1])

            diff_s=np.array(that_data.settlement_compression[1:])\
                  -np.array(that_data.settlement_compression[:-1])

            #first value
            s_0 = that_data.settlement_compression[0]
            p_0 = that_data.pressure_compression[0]
            e_0 = that_data.porosity_original
            '''unit of compression coeffient is 1/MPa'''
            a_0 = (s_0 / p_0) * 1000 / 20 * (1 + e_0)

            list_a = [a_0] + list((diff_s / diff_p) * 1000 / 20 * (1 + e_0))
            list_diff_p = [p_0] + list(diff_p.ravel())

            #porosity list
            list_e = [e_0]

            for j in range(len(list_a)):

                e_next = list_e[j] - list_a[j] * list_diff_p[j] / 1000

                #                        print('...')
                #                        print('last e:',list_e[j])
                #                        print('a:',list_a[j])
                #                        print('diff p:',list_diff_p[j])
                #                        print('next e:',e_next)
                #
                list_e.append(e_next)

            that_data.coefficient_compression = list_a
            that_data.porosity_compression = list_e

            #            print(that_data.pressure_compression)
            #            print(that_data.porosity_compression)

            #compression modulus calculation
            a = that_data.coefficient_compression[
                that_data.pressure_compression.index(200)]

            that_data.modulus_compression = (1 + e_0) / a
            """e=e0-(1+e0)ΔH/20"""
            '''calculate a and e of resilience'''
            that_data.pressure_resilience = [800] + pressure_resilience
            that_data.settlement_resilience = data_settlement_resilience[i]

            that_data.porosity_resilience = list(
                e_0 -
                (1 + e_0) * np.array(that_data.settlement_resilience) / 20)

            #tail: add an element whose pressure is 800
            e_800 = that_data.porosity_compression[
                that_data.pressure_compression.index(800) + 1]

            that_data.porosity_resilience.insert(0, e_800)

            #            print(that_data.pressure_resilience)
            #            print(that_data.porosity_resilience)

            #resilience modulus calculation
            s_100 = that_data.settlement_resilience[
                that_data.pressure_resilience.index(100)]
            s_200 = that_data.settlement_resilience[
                that_data.pressure_resilience.index(200)]

            that_data.modulus_resilience = (200 - 100) / (s_200 -
                                                          s_100) / 1000 * 20
            '''calculate a and e of recompression'''
            that_data.pressure_recompression = [
                50
            ] + pressure_recompression + [1600]
            that_data.settlement_recompression = data_settlement_recompression[
                i]

            that_data.porosity_recompression = list(
                e_0 -
                (1 + e_0) * np.array(that_data.settlement_recompression) / 20)

            #head: add an element whose pressure is 50
            e_50 = that_data.porosity_resilience[-1]
            #
            that_data.porosity_recompression.insert(0, e_50)

            #tail: add an element whose pressure is 1600
            e_1600 = that_data.porosity_compression[
                that_data.pressure_compression.index(1600) + 1]

            that_data.porosity_recompression.append(e_1600)

            #            print(that_data.pressure_recompression)
            #            print(that_data.porosity_recompression)

            print('Pc: %dkPa' % (that_data.pressure_consolidation))
            print('Cc: %.3f' % (that_data.index_compression))
            print('Cs: %.3f' % (that_data.index_resilience))
            print('Es1-2: %.2fMPa' % (that_data.modulus_compression))
            print('Eo2-1: %.2fMPa' % (that_data.modulus_resilience))
            '''print all data in sample paper'''
            new_sheet.write(i * 7 + 1, 0, '室内编号', style)
            new_sheet.write(i * 7 + 1, 1, that_data.indoor_id, style)
            new_sheet.write(i * 7 + 1, 2, '野外编号', style)
            new_sheet.write(i * 7 + 1, 3, that_data.hole_id, style)
            new_sheet.write(i * 7 + 1, 4, '起始深度', style)
            new_sheet.write(i * 7 + 1, 5,
                            str(that_data.start_depth) + 'm', style)
            new_sheet.write(i * 7 + 1, 6, '终止深度', style)
            new_sheet.write(i * 7 + 1, 7,
                            str(that_data.end_depth) + 'm', style)

            new_sheet.write(i * 7 + 2, 0, '先期固结压力', style)
            new_sheet.write(i * 7 + 2, 1,
                            'Pc=%dkPa' % (that_data.pressure_consolidation),
                            style)
            new_sheet.write(i * 7 + 2, 2, '压缩指数', style)
            new_sheet.write(i * 7 + 2, 3,
                            'Cc=%.3f' % (that_data.index_compression), style)
            new_sheet.write(i * 7 + 2, 4, '回弹指数', style)
            new_sheet.write(i * 7 + 2, 5,
                            'Cs=%.3f' % (that_data.index_resilience), style)
            new_sheet.write(i * 7 + 2, 6, '压缩模量', style)
            new_sheet.write(i * 7 + 2, 7,
                            'Es1-2=%.2fMPa' % (that_data.modulus_compression),
                            style)
            new_sheet.write(i * 7 + 2, 8, '回弹模量', style)
            new_sheet.write(i * 7 + 2, 9,
                            'Eo2-1=%.2fMPa' % (that_data.modulus_resilience),
                            style)

            new_sheet.write(i * 7 + 3, 0, 'P (kPa)', style)
            new_sheet.write(i * 7 + 3, 1, '0', style)

            for j in range(len(that_data.pressure_compression)):

                new_sheet.write(i * 7 + 3, j + 2,
                                '%d' % (that_data.pressure_compression[j]),
                                style)

            new_sheet.write(i * 7 + 4, 0, 'ΔH (mm)', style)
            new_sheet.write(i * 7 + 4, 1, '', style)

            for j in range(len(that_data.settlement_compression)):

                new_sheet.write(i * 7 + 4, j + 2,
                                '%.3f' % (that_data.settlement_compression[j]),
                                style)

            new_sheet.write(i * 7 + 5, 0, 'e', style)

            for j in range(len(that_data.porosity_compression)):

                new_sheet.write(i * 7 + 5, j + 1,
                                '%.3f' % (that_data.porosity_compression[j]),
                                style)

            new_sheet.write(i * 7 + 6, 0, 'a (1/MPa)', style)
            new_sheet.write(i * 7 + 6, 1, '', style)

            for j in range(len(that_data.coefficient_compression)):

                new_sheet.write(
                    i * 7 + 6, j + 2,
                    '%.3f' % (that_data.coefficient_compression[j]), style)

            that_data.ResilienceCurve(output_folder)

    new_workbook.save(output_folder + '数据输出.xls')