def WorkbookClassification(xls_path,num_head_rows,num_head_columns):
    
    print('')
    print('--Workbook Classification')
    
    plt.style.use('ggplot')
    
    #open the excel sheet to be operated on
    #formatting_info: keep the header format
    workbook=xlrd.open_workbook(xls_path,formatting_info=True)
    
    #construct output folder path
    tables_output_folder=xls_path.replace('.xls','').replace('input','output')+'\\分类\\'
    figures_output_folder=xls_path.replace('.xls','').replace('input','output')+'\\分类\\图\\总图\\'
    
    #generate output folder
    O_P.GenerateFolder(figures_output_folder)
  
    #construct map between sheet names and head rows
    list_sheet_names=list(workbook.sheet_names())
         
    title_list=['粉土密实度分类',
                '粉土湿度分类',
                '黏性土状态分类',
                '土的分类',
                '备注']
    
    #classification result list
    classification_ω0=[]
    classification_e0=[]
    classification_IL=[]
    classification_GB=[]
    classification_note=[]
    
    #classification result list
    classification_ω0,classification_e0,classification_IL=[],[],[]
    
    #traverse all sheets
    for this_sheet_name in list_sheet_names[-1:]:
            
        print('')
        print('...')
        print('......')
        print('->sheet name:',this_sheet_name)
        
        #Data Frame object
        channel=pd.read_excel(xls_path,sheet_name=this_sheet_name)
        
        final_head_columns,unit_list=O_H_C.HeadColumnsGeneration(channel,num_head_rows)
        
        #all info of dataframe
        value_matrix=channel.values
        
        #delete the repetition
        index_valid=O_L.ValidIndexList(value_matrix[num_head_rows:,1])
        
        print('-->Valid Samples:',len(index_valid))
        
        for k in range(len(final_head_columns)):
            
            this_head=final_head_columns[k]
            
            #search for note and make statistics
            if '备' in this_head or '注' in this_head:
                
                list_note=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_note=this_head
                
                print('-->head:'+head_note)
                
            #search for type of silt
            if '分类' in this_head:
                
                list_GB=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_GB=this_head
                
                print('-->head:'+head_GB)
                
            #search for pore ratio
            if 'e0' in this_head:
     
                list_e0=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_e0=this_head
                
                print('-->head:'+head_e0)
             
            #search for moisture content
            if 'ω0' in this_head:
                
                list_ω0=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_ω0=this_head 
                
                print('-->head:'+head_ω0)
                
            #search for liquidity index
            if 'IL' in this_head:
                
                list_IL=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_IL=this_head
                
                print('-->head:'+head_IL)

        #filter floury soil
        index_floury_soil=O_L.GBIndexFlourySoil(list_GB)
        
        ω0_valid=O_L.CustomIndexList(list_ω0,index_floury_soil)
        e0_valid=O_L.CustomIndexList(list_e0,index_floury_soil)

        #filter cohesive silt
        index_cohesive_silt=O_L.GBIndexCohesiveSilt(list_GB)
        
        IL_valid=O_L.CustomIndexList(list_IL,index_cohesive_silt)
     
        #list of classification result
        #floury soil
        classification_ω0+=SiltMoistureClassification(ω0_valid,num_head_rows)
        classification_e0+=SiltCompactnessClassification(e0_valid,num_head_rows)
        
        #cohesive silt
        classification_IL+=ClayeySiltStateClassification(IL_valid,num_head_rows)
        
        #GB
        classification_GB+=list_GB
        
        #note
        classification_note+=list_note
        
    #collect them into list
    classification_list=[classification_e0,
                         classification_ω0,
                         classification_IL,
                         classification_GB,
                         classification_note]
    #frequency to save
    list_frequency_map=[List2FrequencyMap(classification_list[ix]) for ix in range(len(title_list))]
    
    #construct new workbook   
    new_workbook=xlwt.Workbook(encoding='utf-8') 
    
    #construct new sheet
    new_sheet=new_workbook.add_sheet("总表")          
          
    #define the border style
    borders = xlwt.Borders()
    borders.left = 1
    borders.right = 1
    borders.top = 1
    borders.bottom = 1
    borders.bottom_colour=0x3A    
     
    style = xlwt.XFStyle()
    style.borders = borders
    
    #instant row
    row=0
    
    #title
    for k in range(len(title_list)):
        
        new_sheet.write(row,0,title_list[k],style)
        
        row+=1
        
        new_sheet.write(row,0,'总量',style)
        new_sheet.write(row,1,len(classification_list[k]),style)
        
        row+=1
        
#        print(list_frequency_map[k])
        
        for kk in range(len(list_frequency_map[k])):
            
            if isinstance(list(list_frequency_map[k].keys())[kk],str):
                
                new_sheet.write(row,0,list(list_frequency_map[k].keys())[kk],style)
                
            else:
                
                new_sheet.write(row,0,'其他',style)
                
            new_sheet.write(row,1,list(list_frequency_map[k].values())[kk],style)
            
            row+=1
            
        row+=1
            
    new_workbook.save(tables_output_folder+'统计总表.xls')
    
    #delete blank list
    real_title_list=O_L.CustomIndexList(title_list,O_L.DeleteBlankList(classification_list))
    real_classification_list=O_L.CustomIndexList(classification_list,O_L.DeleteBlankList(classification_list))
    
    #delete nan in classification list
    new_classification_list=[]
    
    for this_classification in real_classification_list:
        
        new_classification=[]
    
        for item in this_classification:
            
            if not isinstance(item,str):
                
                if np.isnan(item):
                    
#                    print('nan')
                    
                    continue
                
            new_classification.append(item)
            
        new_classification_list.append(new_classification)
        
    #construct a map between title and classification result
    map_title_classification=dict(zip(real_title_list,new_classification_list))
    
    #statistics result tables of classification
    TitleAndClassification2Table(map_title_classification,tables_output_folder)
    
    #statistics result figures of classification
    ClassificationStatistics(map_title_classification,figures_output_folder)
def MergedWorkbookClassification(list_xls_path,num_head_rows,num_head_columns):
    
    print('')
    print('--Merged Workbook Classification')
    
    plt.style.use('ggplot')
    
    #construct output folder path
    tables_output_folder=list_xls_path[0].split('input')[0]+'output\\颗分汇总\\分类\\'
    figures_output_folder=list_xls_path[0].split('input')[0]+'output\\颗分汇总\\分类\\图\\总图\\'
        
    #generate output folder
    O_P.GenerateFolder(tables_output_folder)
    O_P.GenerateFolder(figures_output_folder)
    
    #DF channels
    total_channels=[]
    
    for this_xls_path in list_xls_path:
        
        #open the excel sheet to be operated on
        #formatting_info: keep the header format
        workbook=xlrd.open_workbook(this_xls_path,formatting_info=True)
        
        #construct map between sheet names and head rows
        list_sheet_names=list(workbook.sheet_names())
     
        #traverse all sheets
        for this_sheet_name in list_sheet_names:
            
            #Data Frame object
            that_channel=pd.read_excel(this_xls_path,sheet_name=this_sheet_name)
            
            #collect it
            total_channels.append(that_channel)
     
    title_list=['粉土密实度分类',
                '粉土湿度分类',
                '黏性土状态分类',
                '土的分类',
                '备注',
                '砂类土分类(代号)',
                '砾类土分类(代号)',
                '砂类土分类(名称)',
                '砾类土分类(名称)']
    
    #classification result list
    classification_ω0=[]
    classification_e0=[]
    classification_IL=[]
    classification_GB=[]
    classification_note=[]
    classification_S_type=[]
    classification_G_type=[]
    classification_S_code=[]
    classification_G_code=[]
    
    #traverse all sheets
    for channel in total_channels:
    
        print('')
        print('...')
        print('......')
        print('')

        final_head_columns,unit_list=O_H_C.HeadColumnsGeneration(channel,num_head_rows)
        
        #all info of dataframe
        value_matrix=channel.values
        
        #delete the repetition
        index_valid=O_L.ValidIndexList(value_matrix[num_head_rows:,1])
        
        print('-->Valid Samples:',len(index_valid))
        
        for k in range(len(final_head_columns)):
            
            this_head=final_head_columns[k]
            
            #search for note and make statistics
            if '备' in this_head or '注' in this_head:
                
                list_note=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_note=this_head
                
                print('-->head:'+head_note)
                
            #search for type of silt
            if '分类' in this_head:
                
                list_GB=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_GB=this_head
                
                print('-->head:'+head_GB)
                
            #search for pore ratio
            if 'e0' in this_head:
     
                list_e0=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_e0=this_head
                
                print('-->head:'+head_e0)
             
            #search for moisture content
            if 'ω0' in this_head:
                
                list_ω0=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_ω0=this_head 
                
                print('-->head:'+head_ω0)
                
            #search for liquidity index
            if 'IL' in this_head:
                
                list_IL=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_IL=this_head
                
                print('-->head:'+head_IL)
        
        #delete the repetition and remove label R
        index_valid=O_L.ListWithR(value_matrix[num_head_rows:,1])
     
        print('-->Total Samples:',len(value_matrix[num_head_rows:,1]))
        print('-->Valid Samples:',len(index_valid))
        
        #partition index list
        list_partition_index=[]
        
        for k in range(num_head_columns,np.shape(value_matrix)[1]):
    
            #title str
            title=final_head_columns[k]
            
    #        print(k,title)
        
            if '颗' and '粒' and '分' and '析' in title:
                
                print('-->',title)
                     
                list_partition_index.append(k)
            
            if '不' and '均' and '匀' in title:
                
                print('-->',title)
                
                data_Cu=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                
            if '曲' and '率' in title:
                
                print('-->',title)
                
                data_Ce=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                
            if '分' and '类' in title:
                
                print('-->',title)
                
                data_GB=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
            
    #    print(list_partition_index)
        
        #for partition
        index_partition=O_L.GBIndexPartition(data_GB)
         
        #matrix to contain grain partition proportion
        data_partition=np.zeros((len(index_partition),len(list_partition_index)))
        
        column=0
        
        for this_index in list_partition_index:
            
            data_partition[:,column]=O_L.CustomIndexList(list(value_matrix[num_head_rows:,this_index]),index_partition)
        
            column+=1
        
        #valid part
        GB_partition=O_L.CustomIndexList(data_GB,index_partition)
        Cu_partition=O_L.CustomIndexList(data_Cu,index_partition)
        Ce_partition=O_L.CustomIndexList(data_Ce,index_partition)
        
    #        len(index_valid)
        
        #classificaiotn result
        S_classification_type=[]
        G_classification_type=[]
        S_classification_code=[]
        G_classification_code=[]
        
        for kk in range(len(index_partition)):
                  
            #construct new object
            this_grain=C_F_V.grain()
            
            this_grain.silt_type=GB_partition[kk]
            this_grain.InitMap(list(data_partition[kk,:]))   
            
            this_grain.Partition()
            this_grain.Classification(Cu_partition[kk],Ce_partition[kk])
            
            if '砂' in this_grain.silt_type:
                
                S_classification_type.append(this_grain.classification_type)
                S_classification_code.append(this_grain.classification_code)
                
            if '砾' in this_grain.silt_type:
                
                G_classification_type.append(this_grain.classification_type)
                G_classification_code.append(this_grain.classification_code)
            
        #filter floury soil
        index_floury_soil=O_L.GBIndexFlourySoil(list_GB)

        ω0_valid=O_L.CustomIndexList(list_ω0,index_floury_soil)
        e0_valid=O_L.CustomIndexList(list_e0,index_floury_soil)

        #filter cohesive silt
        index_cohesive_silt=O_L.GBIndexCohesiveSilt(list_GB)

        IL_valid=O_L.CustomIndexList(list_IL,index_cohesive_silt)
     
        #list of classification result
        #floury soil
        classification_ω0+=SiltMoistureClassification(ω0_valid,num_head_rows)
        classification_e0+=SiltCompactnessClassification(e0_valid,num_head_rows)
        
        #cohesive silt
        classification_IL+=ClayeySiltStateClassification(IL_valid,num_head_rows)
        
        #GB
        classification_GB+=list_GB
        
        #note
        classification_note+=list_note
        
#        print(len(classification_GB),len(classification_note))
        
        #grain partition result
        classification_S_type+=S_classification_type
        classification_G_type+=G_classification_type
        classification_S_code+=S_classification_code
        classification_G_code+=G_classification_code
        
    #collect them into list
    classification_list=[classification_e0,
                         classification_ω0,
                         classification_IL,
                         classification_GB,
                         classification_note,
                         classification_S_type,
                         classification_G_type,
                         classification_S_code,
                         classification_G_code]
        
    #delete blank list
    real_title_list=O_L.CustomIndexList(title_list,O_L.DeleteBlankList(classification_list))
    real_classification_list=O_L.CustomIndexList(classification_list,O_L.DeleteBlankList(classification_list))
    
    #delete nan in classification list
    new_classification_list=[]
    
    for this_classification in real_classification_list:
        
        new_classification=[]
    
        for item in this_classification:
            
            if not isinstance(item,str):
                
                if np.isnan(item):
                    
#                    print('nan')
                    
                    continue
                
            new_classification.append(item)
            
        new_classification_list.append(new_classification)
        
    #construct a map between title and classification result
    map_title_classification=dict(zip(real_title_list,new_classification_list))
    
    #statistics result tables of classification
    TitleAndClassification2Table(map_title_classification,tables_output_folder)
    
    #statistics result figures of classification
    ClassificationStatistics(map_title_classification,figures_output_folder)
def SheetsClassification(xls_path,num_head_rows,num_head_columns,list_num_head_columns=None):
    
    print('')
    print('--Sheets Classification')
    
    plt.style.use('ggplot')
    
    #open the excel sheet to be operated on
    #formatting_info: keep the header format
    workbook=xlrd.open_workbook(xls_path,formatting_info=True)
    
    #copy former workbook
    new_workbook=copy(workbook)
        
    #construct output folder path
    tables_output_folder=xls_path.replace('.xls','').replace('input','output')+'\\分类\\'
    
    #generate output folder
    O_P.GenerateFolder(tables_output_folder)
    
    #save as
    new_workbook.save(tables_output_folder+'分类结果.xls')
    
    #construct map between sheet names and head rows
    list_sheet_names=list(workbook.sheet_names())
    
    #default
    if list_num_head_columns==None:
        
        list_num_head_columns=[num_head_columns]*len(list_sheet_names)
        
    map_sheet_names_num_head_columns=dict(zip(list_sheet_names,list_num_head_columns))    
    
    title_list=['粉土密实度分类',
                '粉土湿度分类',
                '黏性土状态分类',
                '土的分类',
                '备注']
    
    #traverse all sheets
    for this_sheet_name in workbook.sheet_names():
    
        #open a sheet
        this_sheet=new_workbook.get_sheet(this_sheet_name) 
        
        print('')
        print('...')
        print('......')
        print('->sheet name:',this_sheet_name)
        
        #construct output folder path
        figures_output_folder=xls_path.replace('.xls','').replace('input','output')+'\\分类\\图\\表 '+this_sheet_name+'\\'
        
        #generate output folder
        O_P.GenerateFolder(figures_output_folder)
        O_P.GenerateFolder(tables_output_folder)
        
        #Data Frame object
        channel=pd.read_excel(xls_path,sheet_name=this_sheet_name)
        
        final_head_columns,unit_list=O_H_C.HeadColumnsGeneration(channel,num_head_rows)
        
        #all info of dataframe
        value_matrix=channel.values
        
        #delete the repetition
        index_valid=O_L.ValidIndexList(value_matrix[num_head_rows:,1])
        
        #index of line where info starts
        start_info_row=num_head_rows+1   
             
        for k in range(len(final_head_columns)):
            
            this_head=final_head_columns[k]
            
            #search for note and make statistics
            if '备' in this_head or '注' in this_head:
                
                list_note=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_note=this_head
                
                print('-->head:'+head_note)
                
            #search for type of silt
            if '分类' in this_head:
                
                list_GB=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_GB=this_head
                
                print('-->head:'+head_GB)
                
            #search for pore ratio
            if 'e0' in this_head:
     
                list_e0=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_e0=this_head
                
                print('-->head:'+head_e0)
             
            #search for moisture content
            if 'ω0' in this_head:
                
                list_ω0=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_ω0=this_head 
                
                print('-->head:'+head_ω0)
                
            #search for liquidity index
            if 'IL' in this_head:
                
                list_IL=O_L.CustomIndexList(list(value_matrix[num_head_rows:,k]),index_valid)
                head_IL=this_head
                
                print('-->head:'+head_IL)
                
        #list of classification result
        classification_ω0=SiltMoistureClassification(list_ω0,num_head_rows)
        classification_e0=SiltCompactnessClassification(list_e0,num_head_rows)
        classification_IL=ClayeySiltStateClassification(list_IL,num_head_rows)
        classification_GB=cp.deepcopy(list_GB)
        classification_note=cp.deepcopy(list_note)
        
        #collect them into list
        classification_list=[classification_e0,
                             classification_ω0,
                             classification_IL,
                             classification_GB,
                             classification_note]
        #frequency to save
        list_frequency_map=[List2FrequencyMap(classification_list[ix]) for ix in range(len(title_list))]
        
        #construct new workbook   
        new_workbook=xlwt.Workbook(encoding='utf-8') 
        
        #construct new sheet
        new_sheet=new_workbook.add_sheet("总表")          
              
        #define the border style
        borders = xlwt.Borders()
        borders.left = 1
        borders.right = 1
        borders.top = 1
        borders.bottom = 1
        borders.bottom_colour=0x3A    
         
        style = xlwt.XFStyle()
        style.borders = borders
        
        #instant row
        row=0
        
        #title
        for k in range(len(title_list)):
            
            new_sheet.write(row,0,title_list[k],style)
            
            row+=1
            
            new_sheet.write(row,0,'总量',style)
            new_sheet.write(row,1,len(classification_list[k]),style)
            
            row+=1
            
#            print(list_frequency_map[k])
            
            for kk in range(len(list_frequency_map[k])):
                
                if isinstance(list(list_frequency_map[k].keys())[kk],str):
                    
                    new_sheet.write(row,0,list(list_frequency_map[k].keys())[kk],style)
                    
                else:
                    
                    new_sheet.write(row,0,'其他',style)
                    
                new_sheet.write(row,1,list(list_frequency_map[k].values())[kk],style)
                
                row+=1
                
            row+=1
                
        new_workbook.save(tables_output_folder+'统计总表.xls')
    
        #plus columns
        num_columns_plus=map_sheet_names_num_head_columns[this_sheet_name]-num_head_columns
        
        #write table head
        for this_title in title_list:
            
            num_columns_plus+=1
            
            this_sheet.write(num_head_rows,
                             np.shape(channel.values)[1]+num_columns_plus,
                             this_title,
                             style)
            
        #plus columns   
        num_columns_plus=map_sheet_names_num_head_columns[this_sheet_name]-num_head_columns  
        
        #write classification result    
        for this_classification in classification_list:
            
            num_columns_plus+=1
            
            for i in range(len(this_classification)):
                  
                this_sheet.write(i+start_info_row,
                                 np.shape(channel.values)[1]+num_columns_plus,
                                 this_classification[i],
                                 style)      
    
        #save as
        new_workbook.save(tables_output_folder+'分类结果.xls')
        
        #delete blank list
        real_title_list=O_L.CustomIndexList(title_list,O_L.DeleteBlankList(classification_list))
        real_classification_list=O_L.CustomIndexList(classification_list,O_L.DeleteBlankList(classification_list))
        
        #delete nan in classification list
        new_classification_list=[]
        
        for this_classification in real_classification_list:
            
            new_classification=[]
        
            for item in this_classification:
                
                if not isinstance(item,str):
                    
                    if np.isnan(item):
                        
    #                    print('nan')
                        
                        continue
                    
                new_classification.append(item)
                
            new_classification_list.append(new_classification)
            
        #construct a map between title and classification result
        map_title_classification=dict(zip(real_title_list,new_classification_list))
        
        #statistics result tables of classification
        TitleAndClassification2Table(map_title_classification,tables_output_folder)
        
        #statistics result figures of classification
        ClassificationStatistics(map_title_classification,figures_output_folder)