def _risk_assessment_helper(self, experiment_class, exp_path, debug=False, other=None): dataset_getter = DatasetGetter(None) best_config = self.model_selector.model_selection(dataset_getter, experiment_class, exp_path, self.model_configs, debug, other) # Retrain with the best configuration and test experiment = experiment_class(best_config['config'], exp_path) # Set up a log file for this experiment (I am in a forked process) logger = Logger(str(os.path.join(experiment.exp_path, 'experiment.log')), mode='a') dataset_getter.set_inner_k(None) training_scores, test_scores = [], [] # Mitigate bad random initializations for i in range(3): training_score, test_score = experiment.run_test(dataset_getter, logger, other) print(f'Final training run {i + 1}: {training_score}, {test_score}') training_scores.append(training_score) test_scores.append(test_score) training_score = sum(training_scores)/3 test_score = sum(test_scores)/3 logger.log('TR score: ' + str(training_score) + ' TS score: ' + str(test_score)) with open(os.path.join(self._HOLDOUT_FOLDER, self._ASSESSMENT_FILENAME), 'w') as fp: json.dump({'best_config': best_config, 'HOLDOUT_TR': training_score, 'HOLDOUT_TS': test_score}, fp)
def notify(abnormal, hostname, ip_address, options, subject): log = Logger().get_logger() if "mail" in options: ps_names = "<br>".join(abnormal) mail = Mail() mail.send_mail("<>", get_emails(), [], "[" + ip_address + "] " + subject, ps_names, None) log.info("[mail] %s %s %s %s" % (get_emails(), ip_address, subject, ps_names)) if "syslog" in options: ps_names = ",".join(abnormal) message = '%shostname=%s\tprocess=%s\t' % (make_header(ip_address), hostname, ps_names) log.info('[syslog] %shostname=%s\tprocess=%s\t' % (make_header(ip_address), hostname, ps_names)) send_qradar(message) if "db" in options: insert_db = importlib.import_module("insert_db") ps_names = ",".join(abnormal) message = 'hostname=%s\tip=%s\tprocess=%s\t' % (hostname, ip_address, ps_names) log.info('[db] hostname=%s\tip=%s\tprocess=%s\t' % (hostname, ip_address, ps_names)) insert_db.insert_db(message)
class FileUtil(object): """ create a private variable __logger to log infomation """ __logger = Logger(sys.modules['__main__']) def getFileListByExtension(self, dirPath, extension): """ @summary: @param dirPath:This is Directory Path @param extension: This is file extension name """ extensionFileList = [] extensionName = '.' + extension # # get directory files fileList = os.listdir(dirPath) for file in fileList: # judge the files of directory,if file extension is equal the giving extension,then to return if os.path.splitext(file)[1] == extensionName: extensionFileList.append(file) return extensionFileList def writeToFile(self, filePath, content): """ @sumary: write the content to the file @param filePath: the file to writing contents and the encoding is utf8 @param content: the contents """ # file oprate model is appending and writing, the encoding is utf8 fileWrObj = codecs.open(filePath, 'a', encoding='utf8') fileWrObj.write(content) fileWrObj.close() def readFromFile(self, filePath): """ @sumary:read file content @param filePath: the file path is to read @param content: the content of file """ content = None try: # just read the content of file using the encoding utf8 fileReadObj = codecs.open(filePath, 'r', encoding='utf8') content = fileReadObj.read() fileReadObj.close() except Exception as e: errMsg = 'read file ' + filePath + ' failure' self.__logger.error(errMsg) return content def getFilePath(self, fileName): """ @summary:file path,os.path.dirname(sys.path[0])+'/conf/teste.txt' @param fileName: """ filePath = os.path.dirname(sys.path[0]) + fileName return filePath
def __init__(self, tool_name=None, path_for_log_file='tmp/', parameters=None): """ Class constructor @param tool_name: tool name for debugging purposes @@type tool_name: string @param path_for_log_file: path to save the logs generated by the tool. @@type path_for_log_file: string """ self.tool_name = tool_name self.log = Logger(tool_name, path_for_log_file)
def _model_selection_helper(self, dataset_getter, experiment_class, config, exp_config_name, other=None): """ :param dataset_getter: :param experiment_class: :param config: :param exp_config_name: :param other: :return: """ # Create the experiment object which will be responsible for running a specific experiment experiment = experiment_class(config, exp_config_name) # Set up a log file for this experiment (run in a separate process) logger = Logger(str(os.path.join(experiment.exp_path, 'experiment.log')), mode='a') logger.log('Configuration: ' + str(experiment.model_config)) config_filename = os.path.join(experiment.exp_path, self._CONFIG_FILENAME) # ------------- PREPARE DICTIONARY TO STORE RESULTS -------------- # selection_dict = { 'config': experiment.model_config.config_dict, 'TR_score': 0., 'VL_score': 0., } dataset_getter.set_inner_k(None) # need to stay this way training_score, validation_score = experiment.run_valid( dataset_getter, logger, other) selection_dict['TR_score'] = float(training_score) selection_dict['VL_score'] = float(validation_score) logger.log('TR Accuracy: ' + str(training_score) + ' VL Accuracy: ' + str(validation_score)) with open(config_filename, 'w') as fp: json.dump(selection_dict, fp)
from log.Logger import Logger from common import JSON_TEMPLATE, get_answer_object import copy import json import traceback logger = Logger() class JSONGenerator(object): def __init__(self): pass @staticmethod def write_response_to_file(response_content, file_path): try: with open(file_path, 'w') as fp: json.dump(response_content, fp) msg_string = 'saved file content in filepath - {}\n'.format(file_path) logger.info(msg_string) return file_path except Exception: logger.error(traceback.format_exc()) @staticmethod def create_response(response_object): questions_map = response_object.get('question_map', dict()) ques_to_altq_map = response_object.get('altq_map', dict()) tag_term_map = response_object.get('tag_term_map', dict()) response = {'faqs': [], 'synonyms': response_object.get('graph_synonyms', dict())} try:
import re import logging #from share.config.ConfigManager import ConfigManager import datetime #from share.language.StopWords import StopWords from share.language.Lemmatize import Lemmatizer import requests from textblob import TextBlob from log.Logger import Logger # config_manager = ConfigManager() # qna_conf = config_manager.load_config(key='qna') # conf = config_manager.load_config(key='ontology_analyzer') # remote_config = config_manager.load_config(key="remote_config") oa_logger = Logger() #logging.getLogger('ont_analyzer') NODE_ID = 0 NODE_NAME = 1 SYNONYMS = 2 HAS_FAQS = 3 IS_MANDATORY = 4 class OntologyAnalyzer: def __init__(self): self.kt_id = None self.language = None self.doc_id = None # self.db_manager = DBManager() # self.ont_analyzer_db_manager = OADBManager()
class WeiBuClawer(object): ''' claw the site: https://x.threatbook.cn ''' __logger = Logger(sys.modules['__main__']) def clawDomain(self, domain): ''' 通过域名来爬取威胁情报信息 :param domain: :return: ''' self.__logger.info('claw the site ' + domain) cdomain = "https://x.threatbook.cn/domain/" + domain response = requests.get(cdomain) soup = BeautifulSoup(response.text) return soup def clawIp(self, ip): ''' 通过IP来爬取威胁情报信息 :param ip: :return: ''' self.__logger.info('claw the ip : ' + ip) cIp = "https://x.threatbook.cn/ip/" + ip response = requests.get(cIp) soup = BeautifulSoup(response.text) return soup def parseIpGeroInfo(self, soup): ''' 解析得到IP的地理位置信息 :param soup: :return: ''' self.__logger.info('parse the html to get ip gero info') geroInfo = '' infoTab = soup.find( "table", "table table-condensed table-borderless pull-left res-brief") trList = infoTab.find_all('tr') trSize = len(trList) if trSize > 2: geroTrObj = trList[1] geroInfo = geroTrObj.find('td').string.strip() if geroInfo is not None: geroInfo = ''.join(geroInfo.split()) return geroInfo def parseWeibuThread(self, soup): ''' 解析微步情报 :param soup: :return: ''' self.__logger.info('parse the html to get weibu thread') threadStrs = "" # threadTab = soup.find(id="tag_td") # spanTabList = threadTab.find_all("span","tag non-clickable-tag") spanTabList = soup.find_all("span", "tag non-clickable-tag") slistSize = len(spanTabList) limitSize = slistSize - 1 for ii in range(slistSize): threadStrs = threadStrs + spanTabList[ii].string if ii < limitSize: threadStrs = threadStrs + "," return threadStrs def parseComunityThread(self, soup): ''' 解析社区情报 :param soup: :return: ''' self.__logger.info('parse the html to get comunity thread') threadStrs = "" voteList = soup.find_all("span", "vb4-tag voted") slistSize = len(voteList) limitSize = slistSize - 1 for ii in range(slistSize): threadStrs = threadStrs + voteList[ii].string if ii < limitSize: threadStrs = threadStrs + "," return threadStrs def parseThreads(self, soup): ''' 解析威胁情报 :param soup: :return: ''' self.__logger.info('parse the html to get threads') threadStrs = "" threads = soup.find(id="intelli_table") threadList = threads.find_all('td') listSize = len(threadList) limitSize = listSize - 1 for ii in range(listSize): if (ii + 1) % 3 == 0: if threadList[ii].string is None: continue else: threadStrs = threadStrs + threadList[ii].string if ii < limitSize: threadStrs = threadStrs + "," return threadStrs
def _model_selection_helper(self, dataset_getter, experiment_class, config, exp_config_name, other=None): # Set up a log file for this experiment (run in a separate process) logger = Logger(str(os.path.join(exp_config_name, 'experiment.log')), mode='a') logger.log('Configuration: ' + str(config)) config_filename = os.path.join(exp_config_name, self._CONFIG_FILENAME) # ------------- PREPARE DICTIONARY TO STORE RESULTS -------------- # k_fold_dict = { 'config': config, 'folds': [{} for _ in range(self.folds)], 'avg_TR_score': 0., 'avg_VL_score': 0., 'std_TR_score': 0., 'std_VL_score': 0. } for k in range(self.folds): dataset_getter.set_inner_k(k) fold_exp_folder = os.path.join(exp_config_name, 'FOLD_' + str(k + 1)) # Create the experiment object which will be responsible for running a specific experiment experiment = experiment_class(config, fold_exp_folder) training_score, validation_score = experiment.run_valid( dataset_getter, logger, other) logger.log( str(k + 1) + ' split, TR Accuracy: ' + str(training_score) + ' VL Accuracy: ' + str(validation_score)) k_fold_dict['folds'][k]['TR_score'] = training_score k_fold_dict['folds'][k]['VL_score'] = validation_score tr_scores = np.array( [k_fold_dict['folds'][k]['TR_score'] for k in range(self.folds)]) vl_scores = np.array( [k_fold_dict['folds'][k]['VL_score'] for k in range(self.folds)]) k_fold_dict['avg_TR_score'] = tr_scores.mean() k_fold_dict['std_TR_score'] = tr_scores.std() k_fold_dict['avg_VL_score'] = vl_scores.mean() k_fold_dict['std_VL_score'] = vl_scores.std() logger.log('TR avg is ' + str(k_fold_dict['avg_TR_score']) + ' std is ' + str(k_fold_dict['std_TR_score']) + ' VL avg is ' + str(k_fold_dict['avg_VL_score']) + ' std is ' + str(k_fold_dict['std_VL_score'])) with open(config_filename, 'w') as fp: json.dump(k_fold_dict, fp)
def run_final_model(self, outer_k, debug): outer_folder = osp.join(self._ASSESSMENT_FOLDER, self._OUTER_FOLD_BASE + str(outer_k + 1)) config_fname = osp.join(outer_folder, self._SELECTION_FOLDER, self._WINNER_CONFIG) with open(config_fname, 'r') as f: best_config = json.load(f) dataset_getter_class = s2c(self.model_configs.dataset_getter) dataset_getter = dataset_getter_class( self.model_configs.data_root, self.splits_folder, s2c(self.model_configs.dataset_class), self.model_configs.dataset_name, self.outer_folds, self.inner_folds, self.model_configs.num_dataloader_workers, self.model_configs.pin_memory) # Tell the data provider to take data relative # to a specific OUTER split dataset_getter.set_outer_k(outer_k) dataset_getter.set_inner_k(None) # Mitigate bad random initializations for i in range(self.final_training_runs): final_run_exp_path = osp.join(outer_folder, f"final_run{i+1}") final_run_torch_path = osp.join(final_run_exp_path, f'run_{i+1}_results.torch') # Retrain with the best configuration and test # Set up a log file for this experiment (run in a separate process) logger = Logger(osp.join(final_run_exp_path, 'experiment.log'), mode='a') logger.log( json.dumps(dict(outer_k=dataset_getter.outer_k, inner_k=dataset_getter.inner_k, **best_config), sort_keys=False, indent=4)) if not debug: @ray.remote(num_cpus=1, num_gpus=self.gpus_per_task) def foo(): if not osp.exists(final_run_torch_path): experiment = self.experiment_class( best_config['config'], final_run_exp_path) res = experiment.run_test(dataset_getter, logger) torch.save(res, final_run_torch_path) return outer_k, i # Launch the job and append to list of final runs jobs future = foo.remote() self.final_runs_job_list.append(future) self.progress_manager.update_state( dict(type='START_FINAL_RUN', outer_fold=outer_k, run_id=i)) else: if not osp.exists(final_run_torch_path): experiment = self.experiment_class(best_config['config'], final_run_exp_path) training_score, test_score = experiment.run_test( dataset_getter, logger) torch.save((training_score, test_score), final_run_torch_path) if debug: self.process_final_runs(outer_k)
def model_selection(self, kfold_folder, outer_k, debug): """ Performs model selection by launching each configuration in parallel, unless debug is True. Each process trains the same configuration for each inner fold. :param kfold_folder: The root folder for model selection :param outer_k: the current outer fold to consider :param debug: whether to run the procedure in debug mode (no multiprocessing) """ SELECTION_FOLDER = osp.join(kfold_folder, self._SELECTION_FOLDER) # Create the dataset provider dataset_getter_class = s2c(self.model_configs.dataset_getter) dataset_getter = dataset_getter_class( self.model_configs.data_root, self.splits_folder, s2c(self.model_configs.dataset_class), self.model_configs.dataset_name, self.outer_folds, self.inner_folds, self.model_configs.num_dataloader_workers, self.model_configs.pin_memory) # Tell the data provider to take data relative # to a specific OUTER split dataset_getter.set_outer_k(outer_k) if not osp.exists(SELECTION_FOLDER): os.makedirs(SELECTION_FOLDER) # if the # of configs to try is 1, simply skip model selection if len(self.model_configs) > 1: # Launch one job for each inner_fold for each configuration for config_id, config in enumerate(self.model_configs): # I need to make a copy of this dictionary # It seems it gets shared between processes! cfg = deepcopy(config) # Create a separate folder for each configuration config_folder = osp.join( SELECTION_FOLDER, self._CONFIG_BASE + str(config_id + 1)) if not osp.exists(config_folder): os.makedirs(config_folder) for k in range(self.inner_folds): # Create a separate folder for each fold for each config. fold_exp_folder = osp.join( config_folder, self._INNER_FOLD_BASE + str(k + 1)) fold_results_torch_path = osp.join( fold_exp_folder, f'fold_{str(k+1)}_results.torch') # Tell the data provider to take data relative # to a specific INNER split dataset_getter.set_inner_k(k) logger = Logger(osp.join(fold_exp_folder, 'experiment.log'), mode='a') logger.log( json.dumps(dict(outer_k=dataset_getter.outer_k, inner_k=dataset_getter.inner_k, **config), sort_keys=False, indent=4)) if not debug: @ray.remote(num_cpus=1, num_gpus=self.gpus_per_task) def foo(): if not osp.exists(fold_results_torch_path): experiment = self.experiment_class( config, fold_exp_folder) res = experiment.run_valid( dataset_getter, logger) torch.save(res, fold_results_torch_path) return dataset_getter.outer_k, dataset_getter.inner_k, config_id # Launch the job and append to list of outer jobs future = foo.remote() self.outer_folds_job_list.append(future) self.progress_manager.update_state( dict(type='START_CONFIG', outer_fold=outer_k, inner_fold=k, config_id=config_id)) else: # debug mode if not osp.exists(fold_results_torch_path): experiment = self.experiment_class( config, fold_exp_folder) training_score, validation_score = experiment.run_valid( dataset_getter, logger) torch.save((training_score, validation_score), fold_results_torch_path) if debug: self.process_config(config_folder, deepcopy(config)) if debug: self.process_inner_results(SELECTION_FOLDER, config_id) else: # Performing model selection for a single configuration is useless with open(osp.join(SELECTION_FOLDER, self._WINNER_CONFIG), 'w') as fp: json.dump(dict(best_config_id=0, config=self.model_configs[0]), fp, sort_keys=False, indent=4)
class CsvUtil(object): __logger = Logger(sys.modules['__main__']) def readCsvRntnList(self, filePath): ''' 读取csv文件的内容并且返回 :param filePath: :param fileName: :return: ''' # return list rntnList = [] # 以rb的方式打开csv文件 file = open(filePath, 'r', encoding="utf-8") reader = csv.reader(file) for line in reader: rntnList.append(line) file.close() return rntnList def readCsvRntnDictList(self, filePath): ''' 读取csv并且返回List[dict,dict,...,dict] :param filePath: :return: ''' # rntnDictList = [] # file = open(filePath, 'r', encoding="utf-8") reader = csv.reader(file) # csv line : reader.line_num headRow = next(reader) totalColumns = len(headRow) for row in reader: dict = {} for ii in range(totalColumns): dict[headRow[ii]] = row[ii] rntnDictList.append(dict) return rntnDictList def writeContent2Csv(self, filePath, valueList): ''' 往csv文件中写内容 :param filePath: :param valueList: :return: ''' file = open(filePath, 'w', newline='') writer = csv.writer(file) # writer.writerows(valueList) file.close() def writeList2Csv(self, filePath, valueList, headList): ''' 往csv文件中写内容 :param filePath: :param valueList: :param headList: :return: ''' file = open(filePath, 'w', newline='') writer = csv.writer(file) # writer.writerow(headList) writer.writerows(valueList) file.close() def writeDictList2Csv(self, filePath, dictList, headList): ''' 往csv文件中写内容 :param filePath: :param dictList: :param headList: :return: ''' file = open(filePath, 'w', newline='') writer = csv.writer(file) # writer.writerow(headList) totalColumns = len(headList) for dict in dictList: row = [] for ii in range(totalColumns): row.append(dict[headList[ii]]) writer.writerow(row) file.close()
#add question import json import random import string import nltk from nltk.corpus import stopwords import csv import copy import pandas as pd import uuid from indentAPI import Indent api = Indent() from log.Logger import Logger oa = Logger() def getAltTagsCsv(q, terms, tags): #tokenizing the question, so that we can extract the necessary parts of speech temp = nltk.word_tokenize(q.lower()) temp = nltk.pos_tag(temp) #classified verbs in this question (every verbs classification has the first letter as V) q_verbs = [ pair[0] for pair in temp if pair[1][0] == 'V' and pair[0] not in stopwords.words("english") and pair[0].isalpha() ] #classified adjective in this question (every adjective classification has the first letter as J) q_adj = [ pair[0] for pair in temp if pair[1][0] == 'J' and pair[0] not in stopwords.words("english") and pair[0].isalpha() ]
import traceback import re import datetime import csv import itertools from anytree import Node, PreOrderIter sys.path.append(str(os.getcwd())) from analyzer.kg_export.config.config import ontology_analyzer as conf from analyzer.kg_export.config.config import SYNONYM_DELIMITER, TRAIT_DELIMITER from log.Logger import Logger from analyzer.kg_export.language.StopWords import StopWords from analyzer.kg_export.language.Lemmatize import Lemmatizer from analyzer.kg_export.language.StringProcessor import StringProcessor oa_logger = Logger() string_processor = StringProcessor() NODE_ID = 0 NODE_NAME = 1 SYNONYMS = 2 HAS_FAQS = 3 IS_MANDATORY = 4 ALLOWED_CHECKS = { 'unreachable_questions': 'UnreachableQuestions', 'questions_at_root': 'QuestionsAtRoot', 'longest_identical_subtree_cousins':'longest_identical_subtree_cousins', 'leaves_without_faqs':'leaves_without_faqs', 'chains_of_nodes':'chains_of_nodes', 'repeated_node_names':'repeated_node_names', #'tree_too_long':'tree_too_long', #'better_matched_paths':'better_matched_paths',
class ExcelUtil(object): __logger = Logger(sys.modules['__main__']) def readXlsxExcelSheetContentList(self, filePath, sheetName, head=True): ''' 读取excel里面某一个sheet name 的内容,返回的内容为一个List[list,list...]对象 这个只支持.xlsx格式 :param filePath: :param sheetName: :return: ''' # sheet content list valueList = [] try: wb = openpyxl.load_workbook(filePath) sheet = wb.get_sheet_by_name(sheetName) rowObjArray = list(sheet.rows) totalRow = sheet.max_row startRow = 0 if head: startRow = 0 else: startRow = 1 for rowIndex in range(startRow, totalRow): row = rowObjArray[rowIndex] tmp = [] for cell in range(len(row)): tmp.append(row[cell].value) valueList.append(tmp) except Exception as ex: self.__logger.error("读取Excel文件失败!") return valueList def readXlsxExcelSheetContentDictList(self, filePath, sheetName): ''' 读取excel里面某一个sheet name 的内容,返回的内容为一个List[dict,dict...]对象 这个只支持.xlsx格式 :param filePath: :param sheetName: :return: ''' # sheet content list valueList = [] try: wb = openpyxl.load_workbook(filePath) sheet = wb.get_sheet_by_name(sheetName) # 生成器类型,不能使用索引 rowObjArray = list(sheet.rows) totalRow = sheet.max_row startRow = 1 headRow = rowObjArray[0] totalColumns = sheet.max_column # 读取 for rowIndex in range(startRow, totalRow): row = rowObjArray[rowIndex] tmp = {} for cell in range(totalColumns): tmp[headRow[cell].value] = row[cell].value valueList.append(tmp) except Exception as ex: self.__logger.error("读取Excel文件失败!") return valueList def readExcelSheetContentList(self, filePath, sheetName, head=True): ''' 读取excel里面某一个sheet name 的内容,返回的内容为一个List[list,list...]对象 支持所有excel格式 :param filePath: :param sheetName: :return: ''' # sheet content list valueList = [] try: wb = xlrd.open_workbook(filePath) sheet = wb.sheet_by_name(sheetName) # sheet.nrows totalRow = sheet.nrows startRow = 0 if head: startRow = 0 else: startRow = 1 for rowIndex in range(startRow, totalRow): valueList.append(sheet.row_values(rowIndex)) except Exception as ex: self.__logger.error("读取Excel文件失败!") return valueList def readExcelSheetContentDictList(self, filePath, sheetName): ''' 读取excel里面某一个sheet name 的内容,返回的内容为一个List[dict,dict...]对象 支持所有excel格式 :param filePath: :param sheetName: :return: ''' # sheet content list valueList = [] try: wb = xlrd.open_workbook(filePath) sheet = wb.sheet_by_name(sheetName) totalRow = sheet.nrows headRow = sheet.row_values(0) totalColumns = sheet.ncols # 读取 for rowIndex in range(1, totalRow): row = sheet.row_values(rowIndex) tmp = {} for cell in range(totalColumns): tmp[headRow[cell]] = row[cell] valueList.append(tmp) except Exception as ex: self.__logger.error("读取Excel文件失败!") return valueList def writeList2XlsxExcel(self, filePath, sheetName, valueList): ''' 将内容写入到Excel文件中,只支持xlsx格式的数据 :param filePath: :param sheetName: :param valueList: list[list,list,...] :return: ''' try: wb = openpyxl.Workbook() sheet = wb.active sheet.title = sheetName for ii in range(0, len(valueList)): for jj in range(0, len(valueList[ii])): sheet.cell(row=ii + 1, column=jj + 1, value=str(valueList[ii][jj])) wb.save(filePath) except Exception as ex: self.__logger.error("保存文件失败") def writeList2XlsxExcelWithHead(self, filePath, sheetName, valueList, headList): ''' 将内容写入到Excel文件中 只支持xlsx格式 :param filePath: :param sheetName: :param valueList: :return: ''' try: wb = openpyxl.Workbook() sheet = wb.active sheet.title = sheetName # write head for jj in range(0, len(headList)): sheet.cell(row=1, column=jj + 1, value=str(headList[jj])) # write contents for ii in range(0, len(valueList)): for jj in range(0, len(valueList[ii])): sheet.cell(row=ii + 2, column=jj + 1, value=str(valueList[ii][jj])) wb.save(filePath) except Exception as ex: self.__logger.error("保存文件失败") def writeDictList2XlsxExcelWithHead(self, filePath, sheetName, dictValueList, headList): ''' 将内容写入到Excel文件中 只支持xlsx格式 :param filePath: :param sheetName: :param valueList: :return: ''' try: wb = openpyxl.Workbook() sheet = wb.active sheet.title = sheetName # write head totalColumns = len(headList) for jj in range(0, totalColumns): sheet.cell(row=1, column=jj + 1, value=str(headList[jj])) # write contents for ii in range(0, len(dictValueList)): dictRow = dictValueList[ii] for jj in range(0, totalColumns): sheet.cell(row=ii + 2, column=jj + 1, value=dictRow[headList[jj]]) wb.save(filePath) except Exception as ex: self.__logger.error("保存文件失败") def writeList2Excel(self, filePath, sheetName, valueList): ''' 将内容写入到Excel文件中,支持Excel所有格式 :param filePath: :param sheetName: :param valueList:list[list,list,...] :return: ''' try: wb = xlwt.Workbook() # sheet = wb.add_sheet(sheetName, cell_overwrite_ok=True) for ii in range(len(valueList)): for jj in range(len(valueList[ii])): sheet.write(ii, jj, valueList[ii][jj]) wb.save(filePath) except Exception as ex: self.__logger.error("保存文件失败") def writeList2ExcelWithHead(self, filePath, sheetName, valueList, headList): ''' 将内容写入到Excel文件中 :param filePath: :param sheetName: :param valueList: :return: ''' try: wb = xlwt.Workbook() # sheet = wb.add_sheet(sheetName, cell_overwrite_ok=True) for jj in range(len(headList)): sheet.write(0, jj, headList[jj]) for ii in range(len(valueList)): for jj in range(len(valueList[ii])): sheet.write(ii + 1, jj, valueList[ii][jj]) wb.save(filePath) except Exception as ex: self.__logger.error("保存文件失败") def writeDictList2ExcelWithHead(self, filePath, sheetName, valueDictList, headList): ''' 将内容写入到Excel文件中 :param filePath: :param sheetName: :param valueDictList: dict list===> list[dict,dict,...] :param headList: head list :return: ''' try: wb = xlwt.Workbook() # sheet = wb.add_sheet(sheetName, cell_overwrite_ok=True) totalColumns = len(headList) for jj in range(totalColumns): sheet.write(0, jj, headList[jj]) for ii in range(len(valueDictList)): rowDict = valueDictList[ii] for jj in range(totalColumns): sheet.write(ii + 1, jj, rowDict[headList[jj]]) wb.save(filePath) except Exception as ex: self.__logger.error("保存文件失败") def combine2ExcelContent(self, file01Path, sheet01Name, excel01Index, file02Path, sheet02Name, excel02Index, resultFilePath, resultSheetName): ''' 将两个Excel中的内容按照某一列进行join操作(实现类似于SQL的joinc操作) :param file01Path: :param sheet01Name: :param excel01Index: :param file02Path: :param sheet02Name: :param excel02Index: :param resultFilePath: :param resultSheetName: :return: ''' excel01ValueList = self.readExcelSheetContentList( file01Path, sheet01Name) excel02ValueList = self.readExcelSheetContentList( file02Path, sheet02Name) resultValueList = [] for ii in range(len(excel01ValueList)): tmp = [] for jj in range(len(excel02ValueList)): if str(excel01ValueList[ii][excel01Index]) == str( excel02ValueList[jj][excel02Index]): for xx in range(len(excel01ValueList[ii])): if xx == excel01Index: continue excel02ValueList[jj].append(excel01ValueList[ii][xx]) tmp = excel02ValueList[jj] resultValueList.append(tmp) break self.write2Excel(resultFilePath, resultSheetName, resultValueList)