def get_file(file_dir, label_map): # step1:获取路径下所有的图片路径名,存放到 # 对应的列表中,同时贴上标签,存放到label列表中 images = {} label2num = {} for name, num in label_map.items(): images[name] = [] label2num[name] = [] for label, num in label_map.items(): for file in os.listdir(os.path.join(file_dir, label)): images[label].append(os.path.join(file_dir, label, file)) label2num[label].append(num) # 打印出提取图片的情况,检测是否正确提取 for label, num in label_map.items(): logger.info("There are {} {}".format(len(images[label]), label)) # step2:对生成的图片路径和标签List做打乱处理把所有的合起来组成一个list(img和lab) # 合并数据numpy.hstack(tup) # tup可以是python中的元组(tuple)、列表(list),或者numpy中数组(array),函数作用是将tup在水平方向上(按列顺序)合并 image_list = np.hstack((*list(images.values()), )) label_list = np.hstack((*list(label2num.values()), )) return image_list, label_list
def evaluate(self, eval_dir, logs_dir): eval_image, eval_label = get_file(eval_dir, self.label_map) predictions = [] i = 0 with tf.Graph().as_default(): for image in get_image(eval_image): predictions.append(self.predict(image, self.model_dir)) if i % 100 == 0: logger.info("eval process {}".format(i)) i += 1 eval_txt = open(os.path.join(logs_dir, "D3W3S7_test.txt"), "a") for label, num in self.label_map.items(): eval_txt.write("\n num_{}: {}, predict_{}: {}".format( label, sum([1 for i in eval_label if i == num]), label, sum([1 for i in predictions if i == num]), )) eval_txt.write( "\n num_total: {}, num_correct: {}, accuracy= {:.2%} ".format( len(eval_label), sum([1 for i, j in zip(predictions, eval_label) if i == j]), sum([1 for i, j in zip(predictions, eval_label) if i == j]) / len(eval_label), )) eval_txt.close()
def call(self, context): args = context.args data = args.inputData1 data_path = args.outputData1 data_file = os.path.join(data_path, "data.json") series = data['series'] for ser in series: logger.debug(ser['windowCenter']) windC = [int(ser['windowCenter'].split('\\')[0]) < 0 for ser in series] logger.debug(windC) if not sum(windC): logger.info("Nothing to download.") return ser = series[windC.index(True)] dicoms_path = path.mkdirs(os.path.join(data_path, "dicoms")) logger.info("Downloading: {} files".format(len(ser['files']))) download = runtime.retry(stop_max_attempt_number=3)(functools.partial(downloading, path=dicoms_path)) asyncio.map(download, ser['files'], thread=True, pbar=True, workers=len(ser['files'])) data['seriesUid'] = ser['seriesUid'] json.dump(data, data_file) return data_path
def SPImageBox(context): args = context.args images = args.inputImage files = [] try: if args.inputData: jsonFile = os.path.join(args.inputData, "project.json") with open(jsonFile, "rb") as load_f: fileInfo = json.load(load_f) for i, j in fileInfo["metadata"].items(): files.append(os.path.join(images.folder, j["vid"])) try: filename = (os.path.splitext(j["vid"])[0] + "_" + j["av"]["1"] + ".png") except: filename = (os.path.splitext(j["vid"])[0] + "_" + i.split("_")[-1] + ".png") xy = j["xy"][1:] img = image.read(os.path.join(images.folder, j["vid"])) image.save( os.path.join( args.outputImage, filename, ), img[int(xy[1]):int(xy[1] + xy[3]), int(xy[0]):int(xy[0] + xy[2]), :, ], ) elif args.xy: for idx, img in enumerate(images): files.append(images.images[idx]) x = int(args.xy[0] + args.x) if args.x else img.shape[1] + 1 y = int(args.xy[1] + args.y) if args.y else img.shape[0] + 1 image.save( os.path.join( args.outputImage, storage.delimiter.join(images.images[idx].split( storage.delimiter)[8:]), ), img[int(args.xy[1]):y, int(args.xy[0]):x, :, ], ) except: logger.info("can not find project.json or json format error") for idx, img in enumerate(images): if images.images[idx] not in files: image.save( os.path.join( args.outputImage, storage.delimiter.join(images.images[idx].split( storage.delimiter)[8:]), ), img, ) return args.outputImage
def _predict(self, X): logger.info("model is an estimator, use predict()") predictions = self.modelInstance.predict(X) labelCount = 1 if len(predictions.shape) == 1 else predictions.shape[1] predictions = (predictions.T if labelCount > 1 else predictions.reshape(1, len(predictions))) return predictions
def call(self, context): args = context.args data = args.inputData1 data['nodule_preb'] = pd.DataFrame(data['nodule_preb']) url = URL.format(data['customStudyInstanceUid']) response = requests.post(url, data=success_ret_info(data)) response.raise_for_status() logger.info(response.text)
def SPPredictor(context): args = context.args model = args.inputModel device = "cuda" if args.__gpu else "cpu" logger.info( "**********Use {} Device for Predicting**********".format(device)) model.device(device) model.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.ScoreThreshTest fileType = ( ".jpg", ".jpeg", ".png", ".ppm", ".bmp", ".pgm", ".tif", ".tiff", ".webp", ) images = find_files(args.inputData, fileType) outputs = model.predict(images) model.visualizer(images, outputs, args.outputData2) pred_data = [] for output in outputs: pred_data.append({ "image_size": getattr(output["instances"], "image_size", ()), "pred_boxes": getattr( getattr(output["instances"], "pred_boxes", None), "tensor", torch.tensor([]), ).tolist(), "pred_classes": getattr(output["instances"], "pred_classes", torch.tensor([])).tolist(), "pred_masks": getattr(output["instances"], "pred_masks", torch.tensor([])).tolist(), "scores": getattr(output["instances"], "scores", torch.tensor([])).tolist(), "pred_keypoints": getattr(output["instances"], "pred_keypoints", torch.tensor([])).tolist(), }) return pred_data, args.outputData2
def train(self, X, y=None): if self.needTrain: if self.needCrossVal and self.cvInstance: maxScore = 0 logger.info("Using cross validation...") for trainIndex, testIndex in self.cvInstance.split(X): xTrain, xTest = X[trainIndex], X[testIndex] yTrain, yTest = y[trainIndex], y[testIndex] model = self.modelInstance model.fit(xTrain, yTrain) score = model.score(xTest, yTest) if score > maxScore: xTrainMax = xTrain yTrainMax = yTrain self.modelInstance.fit(xTrainMax, yTrainMax) else: self.modelInstance.fit(X, y)
def input_int(self, s, default_value, valid_list=None, help_message=None): while True: try: logger.info(s) inp = "" if len(inp) == 0: raise ValueError("") if help_message is not None and inp == "?": print(help_message) continue i = int(inp) if (valid_list is not None) and (i not in valid_list): return default_value return i except: print(default_value) return default_value
def input_bool(self, s, default_value, help_message=None): while True: try: logger.info(s) inp = "" if len(inp) == 0: raise ValueError("") if help_message is not None and inp == "?": print(help_message) continue return bool({ "y": True, "n": False, "1": True, "0": False }.get(inp.lower(), default_value)) except: print("y" if default_value else "n") return default_value
def call(self, context): args = context.args input_path = args.inputData1 npys_path = os.path.join(input_path, "npys") data = json.load(os.path.join(input_path, "data.json")) npys = [ os.path.join(npys_path, "case.npy"), os.path.join(npys_path, "spacing.npy"), os.path.join(npys_path, "prep_data.npy"), os.path.join(npys_path, "prep_mask.npy"), os.path.join(npys_path, "extendbox.npy"), os.path.join(npys_path, "imgs.npy"), os.path.join(npys_path, "coord.npy"), os.path.join(npys_path, "nzhw.npy"), ] case, spacing, prep_data, prep_mask, extendbox, imgs, coord, nzhw = asyncio.map( npy.load, npys, thread=True, pbar=True, workers=len(npys)) imgs = torch.from_numpy(imgs) coord = torch.from_numpy(coord) try: nodule_df, pbb = self.lung_dete.prediction(imgs, coord, nzhw, spacing, extendbox, batch=self.args.batch) preb = self.lung_isnc.nodule_cls(nodule_df, case, spacing) preb = self.lung_lobe(preb, case, prep_mask, spacing) data["nodule_preb"] = preb.to_dict() logger.info("Sending predicted images...") images = np.array([img for img in pickWithPbb(prep_data, pbb)]) return data, convert.flatAsImage(images) except FunctionTimedOut: raise Exception("Predict timeout")
def SPPredictor(context): args = context.args model = args.inputModel device = "cuda" if args.__gpu else "cpu" logger.info( "**********Use {} Device for Predicting**********".format(device)) model.device(device) model.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.ScoreThreshTest fileType = ( ".jpg", ".jpeg", ".png", ".ppm", ".bmp", ".pgm", ".tif", ".tiff", ".webp", ) images = find_files(args.inputData, fileType) outputs = model.predict(images) pred_data = [] for output in outputs: pred_data.append({ "image_size": output["instances"].image_size, "pred_boxes": output["instances"].pred_boxes.tensor.tolist(), "pred_classes": output["instances"].pred_classes.tolist(), "pred_masks": output["instances"].pred_masks.tolist(), "scores": output["instances"].scores.tolist(), }) return pred_data
def SPTrainer(context): args = context.args model = args.inputModel device = "cuda" if args.__gpu else "cpu" logger.info( "**********Use {} Device for Training**********".format(device)) trainImg = args.inputTrainImage trainJson = os.path.join(args.inputTrainJson, "project.json") if args.inputTestImage is None or args.inputTestJson is None: testImg = trainImg else: testImg = args.inputTestImage if args.inputTestImage is None or args.inputTestJson is None: testJson = trainJson else: testJson = os.path.join(args.inputTestJson, "project.json") jsonData = json.load(trainJson) classes = list(jsonData["attribute"]["1"]["options"].values()) model.dataset_register(trainImg, testImg, trainJson, testJson, classes) model.device(device) params = { "IMS_PER_BATCH": args.ImgsPerBatch, "BASE_LR": args.BaseLR, "MAX_ITER": args.MaxIter, "BATCH_SIZE_PER_IMAGE": args.BatchSizePerImg, "NUM_CLASSES": len(classes), "NUM_WORKERS": args.NumWorkers, } model.set_params(**params) trainer = model.train() model.evaluate(trainer, args.outputData) return model, args.outputData
def input_str(self, s, default_value, valid_list=None, help_message=None): while True: try: logger.info(s) inp = "" if len(inp) == 0: raise ValueError("") if help_message is not None and inp == "?": print(help_message) continue if valid_list is not None: if inp.lower() in valid_list: return inp.lower() if inp in valid_list: return inp return default_value return inp except: print(default_value) return default_value
from suanpan.storage import storage from suanpan.interfaces import HasArguments from suanpan.objects import Context from suanpan.log import logger def getCleanArgs(argsDict): return {key: value for key, value in argsDict.items() if value} kwds = getCleanArgs( HasArguments.defaultArgumentsFormat( Context(**HasArguments.getArgsDictFromEnv()), storage.ARGUMENTS)) storePath = getattr(Context(**HasArguments.getArgsDictFromEnv()), "configFile", None) storageType = kwds["type"] logger.info(f"Download Config File From {storageType} : {storePath}.") storage.setBackend(**kwds) storage.download(storePath, "config/application.properties")
def _transform(self, X): logger.info("model is an transformer, use transform()") return self.modelInstance.transform(X)
def train(self, image, label, logs_dir, model_dir, **kwargs): learning_rate = kwargs.pop("learning_rate", 0.0001) n_classes = kwargs.pop("n_classes", 7) max_step = kwargs.pop("max_step", 4501) decay_steps = kwargs.pop("decay_steps", 233) end_learning_rate = kwargs.pop("end_learning_rate", 0.000001) power = kwargs.pop("power", 0.5) cycle = kwargs.pop("cycle", False) self.n_classes = n_classes # 训练操作定义 train_logits = inference(image, n_classes) train_loss = losses(train_logits, label) train_op = trainning(train_loss, learning_rate) train_acc = evaluation(train_logits, label) # 这个是log汇总记录 summary_op = tf.summary.merge_all() # 产生一个会话 sess = tf.Session() train_writer = tf.summary.FileWriter(logs_dir, sess.graph) # 产生一个saver来存储训练好的模型 saver = tf.train.Saver() # 所有节点初始化 sess.run(tf.global_variables_initializer()) # 队列监控 coord = tf.train.Coordinator() # 设置多线程协调器 threads = tf.train.start_queue_runners(sess=sess, coord=coord) P.mkdirs(model_dir) with open(os.path.join(model_dir, "model.pkl"), "wb") as f: pickle.dump( { "n_classes": self.n_classes, "img_h": self.img_h, "img_w": self.img_w, }, f, ) # 进行batch的训练 try: # 执行MAX_STEP步的训练,一步一个batch for step in np.arange(max_step): # print(step) if coord.should_stop(): break _, tra_loss, tra_acc = sess.run( [train_op, train_loss, train_acc]) learning_rate = tf.train.polynomial_decay( learning_rate=learning_rate, global_step=step, decay_steps=decay_steps, end_learning_rate=end_learning_rate, power=power, cycle=cycle, ) cycle = False if step % decay_steps == 0: cycle = True # 每隔100步打印一次当前的loss以及acc,同时记录log,写入writer if step % 100 == 0: # print(step) # print('Step %d, train loss = %.2f, train accuracy = %.2f%%, learnning rate = %f' % (step, tra_loss, tra_acc * 100.0,learning_rate)) # learning_rate = learning_rate*0.5 train_txt = open( os.path.join(logs_dir, "D3W3S7_train.txt"), "a") train_txt.write( "Step %d, train loss = %.2f, train accuracy = %.2f%% \n" % (step, tra_loss, tra_acc * 100.0)) train_txt.close() summary_str = sess.run(summary_op) train_writer.add_summary(summary_str, step) checkpoint_path = os.path.join(model_dir, "D3W3S7.ckpt") saver.save(sess, checkpoint_path) logger.info("Step %d training done" % (step)) except tf.errors.OutOfRangeError: logger.info("Done training -- epoch limit reached") finally: coord.request_stop() coord.join(threads) sess.close() self.model_dir = model_dir return self.model_dir