def auto_read_picture_move_to_standard_without_label(file_list, width, height, outdir=r'./temp', channel=3, num_threads=2, stay=1): if (os.path.exists(outdir)) is False: os.makedirs(outdir) for one in file_list: fh.image_change_to( one, outdir + "/" + fh.get_path_file_basename(one) + ".png", width, height) train_listtest = fh.get_files_by_types(outdir, '') filename_batch, image_batch = read_pic(train_listtest, width, height, channel, num_threads, len(train_listtest), len(train_listtest)) filename, image = get_file_batchlist(filename_batch, image_batch) filenamebatchlist = [ fh.get_path_file_basename(sh.getstringexep(r"'(.*?)'", one)[0]) for one in filename ] if (stay == 0): fh.finish_files_toproject() return filenamebatchlist, image
def init_load(ignorePar=False): if (testmode or onlytest or ignorePar): fh.remove_file(outpredictcsv) fileslist = None if onlytest or ignorePar: fileslist = fh.get_files_by_types(test_data, ".csv") elif testmode: fileslist = fh.get_files_by_types( test_data, ".csv")[starttest:starttest + testnum] df = pd.DataFrame() filename = [] root_node = [] triggername = [] fileslist.sort(key=lambda elem: int(fh.get_path_file_basename(elem))) for subfile in fileslist: predict = predict_csv(subfile, 0.5) # fh.appendfile(outpredictcsv,subfile+","+str(predict),startchar="\n") filename.append(fh.get_path_file_basename(subfile)) try: rootname = "node_" + str(predict[0][1]) root_node.append(rootname) triggername.append("主机" + rootname + " " + predict[0][2]) except IndexError: root_node.append("0") triggername.append("0") df['filename'] = filename df['root_node'] = root_node df['triggername'] = triggername df.to_csv(outpredictcsv, encoding="utf_8_sig", header=True, index=False)
def imagebatchs_enlarge_booleanimage_cnninfo(fileslist, imagebatchs, twidth, theight, width, height, outdir=None, type=".png", picmode="L", gaussianblur=False, mfilter=(5, 5), mvalue=0.0): if (outdir): pout = outdir else: pout = fh.get_path_file_subpath(fileslist[0]) for one in range(len(fileslist)): fh.array_to_image(enlarge_booleanimage_cnninfo( fh.out_booleanimage_pre(np.multiply(imagebatchs[one], 255)), twidth, theight), pout + "/" + fh.get_path_file_basename(fileslist[one]) + type, picmode=picmode) return fh.imagedir_to_arrays_tfable(pout, type, True, width, height, gaussianblur=gaussianblur, mfilter=mfilter, mvalue=mvalue)
def bayesAlgorithm(trainPath, testPath, tfidfspace_out_arr_path, tfidfspace_out_word_path, testspace_out_arr_path, testspace_out_word_apth): trainSet = readBunch(trainPath) testSet = readBunch(testPath) clf = MultinomialNB(alpha=0.001).fit(trainSet.tdm, trainSet.label) # alpha:0.001 alpha 越小,迭代次数越多,精度越高 # print(shape(trainSet.tdm)) #输出单词矩阵的类型 # print(shape(testSet.tdm)) '''处理bat文件''' tfidfspace_out_arr = str(trainSet.tdm) # 处理 tfidfspace_out_word = str(trainSet) saveFile(tfidfspace_out_arr_path, tfidfspace_out_arr) # 矩阵形式的train_set.txt saveFile(tfidfspace_out_word_path, tfidfspace_out_word) # 文本形式的train_set.txt testspace_out_arr = str(testSet) testspace_out_word = str(testSet.label) saveFile(testspace_out_arr_path, testspace_out_arr) saveFile(testspace_out_word_apth, testspace_out_word) '''处理结束''' predicted = clf.predict(testSet.tdm) # total = len(predicted) # rate = 0 numlist=[] for flabel, fileName, expct_cate in zip(testSet.label, testSet.filenames, predicted): # if flabel != expct_cate: # rate += 1 #print(fileName, ":实际类别:", flabel, "-->预测类别:", expct_cate) # print(fileName, "-->预测类别:", expct_cate) numlist.append(int(fh.get_path_file_basename(fileName))) # print("erroe rate:", float(rate) * 100 / float(total), "%") return [predicted[one] for one in np.argsort(numlist)]
def auto_read_picture_move_to_standard(file_list, width, height, labellist, outdir=r'./temp', channel=3, num_threads=2, stay=1, type=".jpg", default=None, nodefault=False): filelabelmap = dict( zip([fh.get_path_file_basename(one) for one in file_list], labellist)) if (os.path.exists(outdir)) is False: os.makedirs(outdir) for one in file_list: fh.image_change_to( one, outdir + "/" + fh.get_path_file_basename(one) + type, width, height) train_listtest = fh.get_files_by_types(outdir, '') # print(train_listtest) # print(file_list) filename_batch, image_batch = read_pic(train_listtest, width, height, channel, num_threads, len(train_listtest), len(train_listtest)) filename, image = get_file_batchlist(filename_batch, image_batch) filenamebatchlist = [ fh.get_path_file_basename(sh.getstringexep(r"'(.*?)'", one)[0]) for one in filename ] finallabel = [ filelabelmap.get(one, default) for one in filenamebatchlist if (not filelabelmap.get(one, default)) and nodefault ] if (stay == 0): fh.finish_files_toproject() tf.reset_default_graph() return filenamebatchlist, image, finallabel
def auto_read_picture(file_list, width, height, labellist, channel=3, num_threads=2): filelabelmap = dict(zip(file_list, labellist)) filename_batch, image_batch = read_pic(file_list, width, height, channel, num_threads, len(file_list), len(file_list)) filename, image = get_file_batchlist(filename_batch, image_batch) filenamebatchlist = [ fh.get_path_file_basename(sh.getstringexep(r"'(.*?)'", one)[0]) for one in filename ] finallabel = [filelabelmap[one] for one in filenamebatchlist] return filenamebatchlist, image, finallabel