def merge(pkl_paths, output_dir): if os.path.exists(output_dir): raise ValueError( 'Output directory {} already exists. Converted dirs should be combined manually.' .format(output_dir)) os.makedirs(output_dir) # Collect studies from annotation file all_series = [] for pkl_path in pkl_paths: with open(pkl_path, 'rb') as pkl_file: add_series = pickle.load(pkl_file) all_series += add_series # Check for duplicate series unique_series = {(s.study_name, s.series_number) for s in all_series} if len(unique_series) < len(all_series): raise RuntimeError('Found duplicate series in directories {}.'.format( ', '.join(pkl_paths))) # Write summary file for all series util.print_err('Dumping pickle file...') with open(os.path.join(output_dir, 'series_list.pkl'), 'wb') as pkl_file: pickle.dump(all_series, pkl_file) util.print_err('Dumping JSON file...') with open(os.path.join(output_dir, 'series_list.json'), 'w') as json_file: json.dump([dict(series) for series in all_series], json_file, indent=4, sort_keys=True, default=util.json_encoder)
def restructure_directory(input_dir, output_dir, json_path): with open(json_path, 'r') as json_file: dcm_dict = json.load(json_file) for acc, series_dict in tqdm(dcm_dict.items()): folder = os.path.join(input_dir, acc) subfolder = [s for s in os.listdir(folder) if s.startswith('ST')] assert len(subfolder) == 1, "Multiple subfolders present in {}.".format(acc) subfolder = subfolder[0] if len(series_dict) > 1: continue try: for series_acq, inst_num in series_dict.items(): dcm = util.read_dicom(os.path.join(folder, subfolder, series_dict[series_acq]['1'][0])) description = dcm.SeriesDescription.replace('/', ' ') Path(os.path.join(output_dir, acc, description)).mkdir(parents=True, exist_ok=True) series_num = series_acq.split('_')[0] if len(inst_num) < 10: continue for i in range(1, len(inst_num) + 1): source_path = os.path.join(folder, subfolder, inst_num[str(i)][0]) dest_path = os.path.join(output_dir, acc, description, inst_num[str(i)][0]) shutil.copy(source_path, dest_path) os.rename(dest_path, os.path.join(output_dir, acc, description, 'IM-' + series_num.zfill(4) + '-' + str(i).zfill(4) + '.dcm')) except: util.print_err('Error occurred while copying {}. Skipping...'.format(acc)) continue
def print_result(self, results): abs_min = abs(min(results)) abs_max = abs(max(results)) max_value = max(results) if abs_max > abs_min else min(results) pos = results.index(max_value) * self.load_case.step print("Maximum stress in bottom panel: {0:.3e} [Pa] at {1:.2f} [m]". format(max_value, pos)) if max_value > self.wing_box.material.yield_stress: util.print_err( "Wing box failed: bottom panel stress exceeded yield stress.")
def req_clarity_bvc(img_data): #with open(img_file_path) as f: request_pb = general_classify_client.GeneralClassifyRequest() request_pb.image = img_data classify_type = request_pb.classify_type.add() classify_type.type_name = 'clarity' classify_type.topnum = 1 request_str = request_pb.SerializePartialToString() logid = random.randint(1000000, 100000000) #requestinfo = { # 'image': base64.b64encode(f.read()), # } #data = json.dumps(requestinfo) req_array = { 'appid': '123456', 'logid': logid, 'format': 'json', 'from': 'test-python', 'cmdid': '123', 'clientip': '0.0.0.0', 'data': base64.b64encode(request_str), } req_json = json.dumps(req_array) url = conf.api['req_clarity_bvc'] url = random.choice([ 'http://10.156.86.15:8134/GeneralClassifyService/classify', 'http://10.156.86.15:8135/GeneralClassifyService/classify' ]) req = urllib2.Request(url) req.add_header('Content-Type', 'application/json') response = None for i in range(1, 50): try: response = urllib2.urlopen(req, req_json, 1) res_str_tmp = response.read() json_res = json.loads(res_str_tmp) if "err_no" not in json_res: return "no err_no" if json_res["err_no"] != 0: return "err_no is not 0\t" + res_str_tmp res_pb = general_classify_client.GeneralClassifyResponse() res_pb.ParseFromString(base64.b64decode(json_res['result'])) for result in res_pb.result: if result.type_name == "clarity": res = (result.probability[0] + 6) / 12 break except Exception as e: util.print_err(e) time.sleep(1) return res
def print_result(self, min_margin): print("") print("Results for skin buckling") failure = False for section in self.wing_box.sections: if min_margin[section][0] < 1: failure = True print( "Wing box section range: {0:.2f}, {1:.2f} [m]; Lowest margin of safety: {2:.2f} on plate with width {3:.2f} [m]" .format(section.start_y, section.end_y, min_margin[section][0], min_margin[section][1].width)) if failure: util.print_err("Wing box failed due to skin buckling")
def print_result(self, min_margin): print("") print("Results for shear buckling") failure = False for section in self.wing_box.sections: if min_margin[section][0] < 1: failure = True print( "Wing box section range: {0:.2f}, {1:.2f} [m]; Lowest margin of safety: {2:.2f} on {3}" .format( section.start_y, section.end_y, min_margin[section][0], "front spar" if min_margin[section][1] else "back spar")) if failure: util.print_err("Wing box failed due to shear buckling")
def __init__(self, config: Dict[str, str]): if not 'detector_model' in config: print_err('Error: not found "detector_model" in config') raise ValueError self.detector = Detector(config['detector_model']) self.registry = None if 'chrome_web_driver' in config: self.registry = RegistryRequester( selenium_driver_path=config['chrome_web_driver']) else: self.registry = RegistryRequester()
def super_resolution_bvc(img_file_path, url, option='super_resolution'): with open(img_file_path) as f: logid = random.randint(1000000, 100000000) requestinfo = { 'image': base64.b64encode(f.read()), 'type_name': 'image_restoration', 'option': option, } data = json.dumps(requestinfo) req_array = { 'jsonrpc': '2.0', 'method': 'classify', 'id': '123', 'params': [{ 'appid': '123456', 'logid': logid, 'format': 'json', 'from': 'test-python', 'cmdid': '123', 'clientip': '0.0.0.0', 'data': base64.b64encode(data), }] } req_json = json.dumps(req_array) res = None res_json = None for i in range(1, 50): try: req = urllib2.Request(url[0]) req.add_header('Content-Type', 'application/json') response = urllib2.urlopen(req, req_json, 10000) res = json.loads(response.read()) res_json = json.loads( base64.b64decode(res['result']['_ret']['result'])) img_str = base64.b64decode(res_json['image']) nparr = np.fromstring(img_str, np.uint8) img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) break except Exception as e: util.print_err(e) util.print_err("bvc req failed. change server") util.print_err(res) util.print_err(res_json) time.sleep(5) url[0] = get_server_addr() util.print_err(url) return img
def print_result(self, min_margin): print("") print("Results for column buckling") failure = False for section in self.wing_box.sections: if min_margin[section][0] < 1: failure = True print( "Wing box section range: {0:.2f}, {1:.2f} [m]; Lowest margin of safety: {2:.2f} on {3} set with size {4}, {5} [m]" .format(section.start_y, section.end_y, min_margin[section][0], min_margin[section][1].stringer_type.name, min_margin[section][1].stringer_width, min_margin[section][1].stringer_height)) if failure: util.print_err("Wing box failed due to column buckling")
def transitions_from_args(args) -> Dict[Binding, Transition]: transitions_by_binding = {} for binding in Binding: transition = getattr(args, binding.value) if transition is None: continue if transition not in transitions: print_err( f'error: undefined transition {transition} for binding {binding.value}' ) return transitions_by_binding[binding] = transitions[transition] return transitions_by_binding
def get_server_addr(): proxy_url = conf.api['bvc_proxy'] for i in range(10): try: res_proxy = json.loads(util.http_get(proxy_url)) server_json = random.choice(res_proxy['result']['ServerInfo']) addr = server_json['Server']['service_addr'][0] break except Exception as e: util.print_err(e) util.print_err("getting proxy url...") time.sleep(1) ip = addr.split(':')[0] port = 40077 url = "http://%s:%s/1" % (ip, port) return url
def run_test_setup(test_root: Path) -> (CompatTest, str): """ Runs the user through prompts to get the minimum amount of information to return a new CompatTest. """ title = input( 'Enter a human readable title for your test (e.g. "Add a protocol method"): ' ) # initialize FIDL file fidl_name = input( f'Enter name for initial {pink("FIDL")} file (e.g. "before.test.fidl"): ' ) fidl_name = prepend_step(fidl_name, step=0) fidl_library_name = test_name_to_fidl_name(test_root.name) scaffolding.initialize_fidl(test_root / FIDL_DIR / fidl_name, fidl_library_name) fidl_ref: FidlRef = stem(fidl_name) # initialize bindings bindings = {} for binding in BINDINGS: filename = input( f'Enter name for initial {pink(binding)} file (e.g. "before.{EXTENSIONS[binding]}"), or leave empty to skip binding: ' ) if not filename: continue filename = prepend_step(filename, step=0) scaffolding.initialize_src(test_root / binding / filename, binding, fidl_library_name) bindings[binding] = Steps(starting_fidl=fidl_ref, starting_src=f'{binding}/{filename}', steps=[]) if not bindings: print_err('Must include at least one binding to define a test') sys.exit(1) new_test = CompatTest(title=title, fidl={ fidl_ref: FidlDef(source=f'{FIDL_DIR}/{fidl_name}', instructions=[]) }, bindings=bindings) return (new_test, fidl_name)
def main(args): df = pd.read_csv(args.csv_path) examples = [ CatalPhoto(url=str(row['netpublish_URL']), annotation=None) for _, row in df.iterrows() ] # Make directories for holding photos for dir_name in ('wb_pos', 'wb_neg', 'unlabeled'): os.makedirs(os.path.join(args.output_dir, dir_name), exist_ok=True) session = requests.Session() retry = Retry(connect=3, backoff_factor=0.5) adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter) # Download photos for example in tqdm(examples): if example.is_labeled: subdir_name = 'wb_{}'.format( 'pos' if example.has_whiteboard else 'neg') else: subdir_name = 'unlabeled' file_name = '{}.jpg'.format(example.record_id) img_path = os.path.join(args.output_dir, subdir_name, file_name) if os.path.exists(img_path): util.print_err('Already downloaded {}'.format(img_path)) continue url = example.url.replace('original', 'preview') try: response = session.get(url, stream=True, timeout=10) with open(img_path, 'wb') as out_file: shutil.copyfileobj(response.raw, out_file) del response except Exception as e: print('Error downloading from {}: {}'.format(url, e)) continue # Down-sample the image if args.resize_shape is not None: img = Image.open(img_path, 'r').convert('RGB') img = img.resize(args.resize_shape) img.save(img_path)
def test(args): model, ckpt_info = ModelSaver.load_model(args.ckpt_path, args.gpu_ids) args.start_epoch = ckpt_info['epoch'] + 1 model = model.to(args.device) model.eval() data_loader = get_loader(args, phase=args.phase, is_training=False) logger = TestLogger(args, len(data_loader.dataset)) # Get model outputs, log to TensorBoard, write masks to disk window-by-window util.print_err('Writing model outputs to {}...'.format(args.results_dir)) all_gender = [] all_age = [] all_tte = [] all_is_alive = [] all_mu = [] all_s2 = [] with tqdm(total=len(data_loader.dataset), unit=' windows') as progress_bar: for i, (src, tgt) in enumerate(data_loader): all_gender.extend([int(x) for x in src[:, 0]]) all_age.extend([float(x) for x in src[:, 1]]) all_tte.extend([float(x) for x in tgt[:, 0]]) all_is_alive.extend([int(x) for x in tgt[:, 1]]) with torch.no_grad(): pred_params = model.forward(src.to(args.device)) # import pdb # pdb.set_trace() outputs = pred_params.cpu().numpy() all_mu.extend([float(x) for x in outputs[:, 0]]) all_s2.extend([float(x) for x in outputs[:, 1]]) progress_bar.update(src.size(0)) # print pred_params (mu, s) to file fd = open(args.results_dir + '/test_stats.csv', 'w') fd.write('gender, age, tte, is_alive, mu, s2\n') for gender, age, tte, is_alive, mu, s2 \ in zip(all_gender, all_age, all_tte, all_is_alive, all_mu, all_s2): fd.write('%d, %f, %f, %d, %f, %f\n' % (gender, age, tte, is_alive, mu, s2)) fd.close()
def fine_tuning_parameters(self, fine_tuning_boundary, fine_tuning_lr=0.0): """Get parameters for fine-tuning the model. Args: fine_tuning_boundary: Name of first layer after the fine-tuning layers. fine_tuning_lr: Learning rate to apply to fine-tuning layers (all layers before `boundary_layer`). Returns: List of dicts that can be passed to an optimizer. """ def gen_params(boundary_layer_name, fine_tuning): """Generate parameters, if fine_tuning generate the params before boundary_layer_name. If unfrozen, generate the params at boundary_layer_name and beyond.""" saw_boundary_layer = False for name, param in self.named_parameters(): if name.startswith(boundary_layer_name): saw_boundary_layer = True if saw_boundary_layer and fine_tuning: return elif not saw_boundary_layer and not fine_tuning: continue else: yield param # Fine-tune the network's layers from encoder.2 onwards optimizer_parameters = [{ 'params': gen_params(fine_tuning_boundary, fine_tuning=True), 'lr': fine_tuning_lr }, { 'params': gen_params(fine_tuning_boundary, fine_tuning=False) }] # Debugging info util.print_err('Number of fine-tuning layers: {}'.format( sum(1 for _ in gen_params(fine_tuning_boundary, fine_tuning=True)))) util.print_err('Number of regular layers: {}'.format( sum(1 for _ in gen_params(fine_tuning_boundary, fine_tuning=False)))) return optimizer_parameters
def get_series_numbers(args): with open(os.path.join(args.output_dir, 'dir2type.json'), 'r') as json_fh: dir2type = json.load(json_fh) df = pd.read_csv(args.input_csv) for i, row in df.iterrows(): series_dir = os.path.join(args.data_dir, str(row['Acc'])) if os.path.exists(series_dir): print('Found at {}'.format(series_dir)) for subdir in os.listdir(series_dir): if subdir not in dir2type: while True: try: input_num = int( input('{} (0=contrast, 1=other)?\n>>> '.format( subdir))) if input_num == 0 or input_num == 1: break except ValueError: continue dir2type[ subdir] = 'contrast' if input_num == 0 else 'non_contrast' if dir2type[subdir] == 'contrast': print('{} is contrast'.format(subdir)) dcm_dir = os.path.join(series_dir, subdir) dcm_names = [ f for f in os.listdir(dcm_dir) if f.endswith('.dcm') ] dcm = util.read_dicom(os.path.join(dcm_dir, dcm_names[0])) df.loc[i, 'CTA se'] = int(dcm.SeriesNumber) # Write CSV and dir2type mapping util.print_err('Dumping CSV file...') df.to_csv(os.path.join(args.output_dir, 'updated_annotations.csv')) util.print_err('Dumping JSON file...') with open(os.path.join(args.output_dir, 'dir2type.json'), 'w') as json_fh: json.dump(dir2type, json_fh, indent=4, sort_keys=True, default=util.json_encoder)
def test(args): model, ckpt_info = ModelSaver.load_model(args.ckpt_path, args.gpu_ids) args.start_epoch = ckpt_info['epoch'] + 1 model = model.to(args.device) model.eval() data_loader = CIFARLoader('val', args.batch_size, args.num_workers) # Get model outputs, log to TensorBoard, write masks to disk window-by-window util.print_err('Writing model outputs to {}...'.format(args.results_dir)) with tqdm(total=len(data_loader.dataset), unit=' examples') as progress_bar: for i, (inputs, info_dict) in enumerate(data_loader): with torch.no_grad(): logits = model.forward(inputs.to(args.device)) probs = F.softmax(logits) # TODO: Test script is incomplete. Does nothing with the outputs. progress_bar.update(inputs.size(0))
def get_parameters(model, args): """Get parameter generators for a model. Args: model: Model to get parameters from. args: Command-line arguments. Returns: Dictionary of parameter generators that can be passed to a PyTorch optimizer. """ def gen_params(boundary_layer_name, fine_tuning): """Generate parameters, if fine_tuning generate the params before boundary_layer_name. If unfrozen, generate the params at boundary_layer_name and beyond.""" saw_boundary_layer = False for name, param in model.named_parameters(): if name.startswith(boundary_layer_name): saw_boundary_layer = True if saw_boundary_layer and fine_tuning: return elif not saw_boundary_layer and not fine_tuning: continue else: yield param # Fine-tune the network's layers from encoder.2 onwards if args.pretrained or args.fine_tune: optimizer_parameters = [{'params': gen_params(args.fine_tuning_boundary, fine_tuning=True), 'lr': args.fine_tuning_lr}, {'params': gen_params(args.fine_tuning_boundary, fine_tuning=False)}] else: optimizer_parameters = [{'params': gen_params(args.fine_tuning_boundary, fine_tuning=False)}] # Debugging info util.print_err('Number of fine-tuning layers: {}' .format(sum(1 for _ in gen_params(args.fine_tuning_boundary, fine_tuning=True)))) util.print_err('Number of regular layers: {}' .format(sum(1 for _ in gen_params(args.fine_tuning_boundary, fine_tuning=False)))) return optimizer_parameters
def img_quality_ht(img_file_path): with open(img_file_path) as f: it = InnerToken() token = it.generateToken(**conf.apiInfo_2) req_json = { 'image': base64.b64encode(f.read()), 'access_token': token, } url = conf.api['img_quality_ht'] for i in range(1, 50): try: res_json = util.http_post(url, req_json) quality = res_json['result'] log_id = res_json['log_id'] #img_str = base64.b64decode(res_json['result']['image']) #nparr = np.fromstring(img_str, np.uint8) #img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) break except Exception as e: util.print_err(e) util.print_err(res_json) time.sleep(1) return quality
def super_resolution_ht(img_file_path, option='super_resolution'): with open(img_file_path) as f: it = InnerToken() token = it.generateToken(**conf.apiInfo) req_json = { 'image': base64.b64encode(f.read()), 'option': option, 'access_token': token, } url = conf.api['super_resolution_ht'] for i in range(1, 50): try: res_json = util.http_post(url, req_json) img_str = base64.b64decode(res_json['result']['image']) nparr = np.fromstring(img_str, np.uint8) img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) break except Exception as e: util.print_err(e) util.print_err(res_json) time.sleep(1) return img
def run_classifier(args): """Run a trained XGBoost model and get metrics.""" test_filepath, ext = os.path.splitext(args.test_path) test_dir = os.path.dirname(args.test_path) if ext == '.svmlight': test_inputs, test_labels = load_svmlight_file(args.test_path) test_inputs = test_inputs.todense() elif ext == '.npy': test_inputs = np.load(args.test_path) test_labels = np.load(os.path.join(test_dir, 'labels.npy')) else: test_inputs = pd.read_csv(args.test_path) test_labels = pd.read_csv(os.path.join(test_dir, 'labels.csv')) test_data = xgb.DMatrix(test_inputs, label=test_labels, missing=0.) if not os.path.exists(os.path.join(args.model_dir, args.name + '.model')): raise IOError("Could not load model from path {}.".format(os.path.join(args.model_dir, args.name + '.model'))) model = xgb.Booster() model.load_model(os.path.join(args.model_dir, args.name + '.model')) test_probs = model.predict(test_data) metrics = {'Accuracy': sk_metrics.accuracy_score(test_labels, test_probs > 0.5), 'AUROC': sk_metrics.roc_auc_score(test_labels, test_probs), 'AUPRC': sk_metrics.average_precision_score(test_labels, test_probs)} util.print_err('Performance of model at {}:'.format(os.path.join(args.model_dir, args.name + '.model'))) for k, v in metrics.items(): print('{} = {:.4f}'.format(k, v)) test_probs_path = os.path.join(os.path.dirname(args.test_path), '{}_preds.npy'.format(args.name)) util.print_err('Saving predictions to {}...'.format(test_probs_path)) np.save(test_probs_path, test_probs)
def create_hdf5(series_list, output_dir, resample=False, max_series=1e5): hdf5_fh = h5py.File(os.path.join(output_dir, 'data.hdf5'), 'a') for group_name in ('series', 'aneurysm_masks'): if group_name not in hdf5_fh: hdf5_fh.create_group('/{}'.format(group_name)) assert len(series_list) < 1e5, 'Too many series for 5-digit IDs.' for i, s in enumerate(series_list): if i >= max_series: break dset_path = '/series/{:05d}'.format(i + 1) if dset_path in hdf5_fh: continue print('Processing series {} from study {}...'.format( s.series_number, s.study_name)) pixel_arrays = [] is_valid_series = True for slice_name in tqdm(s.slice_names, total=len(s), unit=' slices'): # Process and write slices dcm_path = os.path.join(s.dcm_dir, slice_name + '.dcm') dcm = util.read_dicom(dcm_path) try: pixel_arrays.append(util.dcm_to_raw(dcm)) except NotImplementedError: print('Unsupported image format, not converting study: {}'. format(s.study_name)) is_valid_series = False break if not is_valid_series: continue volume = np.stack(pixel_arrays) aneurysm_mask_path = os.path.join(s.dcm_dir, 'aneurysm_mask.npy') if os.path.exists(aneurysm_mask_path): s.aneurysm_mask_path = aneurysm_mask_path aneurysm_mask = np.transpose(np.load(s.aneurysm_mask_path), [2, 0, 1]) else: s.aneurysm_mask_path = None aneurysm_mask = None assert aneurysm_mask is None or aneurysm_mask.shape == volume.shape, \ 'Mismatched aneurysm mask and volume shapes: {} and {}'.format(aneurysm_mask.shape, volume.shape) if len(s) > 0 and resample: util.print_err('Resampling volume... Shape before: {}'.format( volume.shape)) tick = time.time() dcm = util.read_dicom( os.path.join(s.dcm_dir, s.slice_names[0] + '.dcm')) volume, real_scale = util.resample(volume, dcm.SliceThickness, dcm.PixelSpacing, (1.5, 1., 1.)) util.print_err('Shape after: {}. Resample took {} s.'.format( volume.shape, time.time() - tick)) if aneurysm_mask is not None: util.print_err( 'Resampling mask... Shape before: {}, count before: {}.'. format(aneurysm_mask.shape, np.sum(aneurysm_mask > 0))) tick = time.time() aneurysm_mask, mask_scale = util.resample( aneurysm_mask, dcm.SliceThickness, dcm.PixelSpacing, (1.5, 1., 1.)) util.print_err( 'Mask shape after: {}, count after: {}. Resample took {} s.' .format(aneurysm_mask.shape, np.sum(aneurysm_mask > 0), time.time() - tick)) if not aneurysm_mask.any(): raise RuntimeError( 'Mask has zero volume after resampling.') if s.is_aneurysm: # Recompute slice numbers where the aneurysm lives s.aneurysm_bounds = get_aneurysm_range(aneurysm_mask) s.aneurysm_ranges = [s.aneurysm_bounds] s.absolute_range = [0, aneurysm_mask.shape[0]] # Create one dataset for the volume (int16), one for the mask (bool) s.dset_path = dset_path hdf5_fh.create_dataset(s.dset_path, data=volume, dtype='i2', chunks=True) if aneurysm_mask is not None: s.aneurysm_mask_path = '/aneurysm_masks/{:05d}'.format(i + 1) hdf5_fh.create_dataset(s.aneurysm_mask_path, data=aneurysm_mask, dtype='?', chunks=True) # Print summary util.print_err('Series: {}'.format(len(hdf5_fh['/series']))) util.print_err('Aneurysm Masks: {}'.format(len( hdf5_fh['/aneurysm_masks']))) # Dump pickle and JSON (updated dset_path and mask_path attributes) util.print_err('Dumping pickle file...') with open(os.path.join(output_dir, 'series_list.pkl'), 'wb') as pkl_fh: pickle.dump(series_list, pkl_fh) util.print_err('Dumping JSON file...') with open(os.path.join(output_dir, 'series_list.json'), 'w') as json_file: json.dump([dict(series) for series in series_list], json_file, indent=4, sort_keys=True, default=util.json_encoder) # Clean up hdf5_fh.close()
def test(args): print ("Stage 1") model, ckpt_info = ModelSaver.load_model(args.ckpt_path, args.gpu_ids) print ("Stage 2") args.start_epoch = ckpt_info['epoch'] + 1 model = model.to(args.device) print ("Stage 3") model.eval() print ('This should be false: {}'.format(model.training)) print ("Stage 4") data_loader = CTDataLoader(args, phase=args.phase, is_training=False) #print('data_loader={}'.format(data_loader)) #print('data_loader.dataset={}'.format(data_loader.dataset)) study2slices = defaultdict(list) study2probs = defaultdict(list) study2labels = {} logger = TestLogger(args, len(data_loader.dataset), data_loader.dataset.pixel_dict) print("Stage 5") f = open('/projectnb/ece601/kaggle-pulmonary-embolism/meganmp/train/series_list.pkl','rb') data_labels = pickle.load(f) # Create list to manually process labels #with open('positive.txt') as f: #pos_labels = f.readlines() #pos_labels = [x.strip() for x in pos_labels] ispos = [x.is_positive for x in data_labels] isposidx = [x.study_num for x in data_labels] label_dict = {} for i in range(len(ispos)): label_dict[isposidx[i]] = ispos[i] for key in label_dict.keys(): print('label_dict={}\t{}'.format(key, label_dict[key])) # Get model outputs, log to TensorBoard, write masks to disk window-by-window util.print_err('Writing model outputs to {}...'.format(args.results_dir)) with tqdm(total=len(data_loader.dataset), unit=' windows') as progress_bar: for i, (inputs, targets_dict) in enumerate(data_loader): with torch.no_grad(): cls_logits = model.forward(inputs.to(args.device)) cls_probs = torch.sigmoid(cls_logits) if args.visualize_all: logger.visualize(inputs, cls_logits, targets_dict=None, phase=args.phase, unique_id=i) max_probs = cls_probs.to('cpu').numpy() for study_num, slice_idx, prob in \ zip(targets_dict['study_num'], targets_dict['slice_idx'], list(max_probs)): #print('targets_dict[studynum]={}'.format(targets_dict['study_num'])) #print('targets_dict[sliceidx]={}'.format(targets_dict['slice_idx'])) # Convert to standard python data types study_num = study_num #.item() #study_num = int(study_num) slice_idx = int(slice_idx) # Save series num for aggregation study2slices[study_num].append(slice_idx) study2probs[study_num].append(prob.item()) series = data_loader.get_series(study_num) if study_num not in study2labels: print('study_num={}'.format(study_num)) print('series.is_positive={}'.format(label_dict[study_num])) study2labels[study_num] = label_dict[study_num] #if study_num in pos_labels: #print('DEBUG -------=1?-------------------') #print('POS LABEL') #print('study_num={}'.format(study_num)) #study2labels[study_num] = 1 #else: #print('Not in study2labels. series = {}'.format(study_num)) #print('series.is_positive={}'.format(series.is_positive)) #study2labels[study_num] = int(series.is_positive) #print('study2labels: {}'.format(study2labels[study_num])) progress_bar.update(inputs.size(0)) print('study2labels={}'.format(study2labels)) # Combine masks util.print_err('Combining masks...') max_probs = [] labels = [] predictions = {} print("Get max prob") for study_num in tqdm(study2slices): # Sort by slice index and get max probability slice_list, prob_list = (list(t) for t in zip(*sorted(zip(study2slices[study_num], study2probs[study_num]), key=lambda slice_and_prob: slice_and_prob[0]))) study2slices[study_num] = slice_list study2probs[study_num] = prob_list max_prob = max(prob_list) print('study={}\tmax_prob={}'.format(study_num, max_prob)) max_probs.append(max_prob) label = study2labels[study_num] labels.append(label) predictions[study_num] = {'label':label, 'pred':max_prob} #Save predictions to file, indexed by study number print("Saving predictions to pickle files") with open('{}/preds.pickle'.format(args.results_dir),"wb") as fp: pickle.dump(predictions,fp) results_series = [k for k,_ in predictions.items()] results_pred = [v['pred'] for _,v in predictions.items()] results_label = [v['label'] for _,v in predictions.items()] print('roc_auc_score={}'.format(roc_auc_score(results_label, results_pred))) # Create dataframe summary TRAIN_CSV = '/projectnb/ece601/kaggle-pulmonary-embolism/rsna-str-pulmonary-embolism-detection/train.csv' train_df = pd.read_csv(TRAIN_CSV) train_df = train_df[['SeriesInstanceUID', 'negative_exam_for_pe']] train_df = train_df.groupby('SeriesInstanceUID').aggregate(list) train_df['pe_label'] = train_df['negative_exam_for_pe'].apply(lambda x: 0 if 1 in x else 1) results_dict = { 'series': results_series, 'pred': results_pred } results_df = pd.DataFrame.from_dict(results_dict) results_df = results_df.set_index('series') results_df = results_df.join(train_df, how='left').reset_index().rename({'index': 'series'}) print('roc_auc_score={}'.format(roc_auc_score(results_df['pe_label'], results_df['pred']))) # Calculate confusion matrix results_df['interpretation'] = results_df['pred'].apply(lambda x: 0 if x < 0.5 else 1) print(results_df.head(10)) tn, fp, fn, tp = confusion_matrix(results_df['pe_label'], results_df['interpretation']).ravel() print('confusion_matrix: [{} {} {} {}]'.format(tp, fp, fn, tn))
def test(args): print("Stage 1") model, ckpt_info = ModelSaver.load_model(args.ckpt_path, args.gpu_ids) print("Stage 2") args.start_epoch = ckpt_info['epoch'] + 1 model = model.to(args.device) print("Stage 3") model.eval() print("Stage 4") data_loader = CTDataLoader(args, phase=args.phase, is_training=False) print(data_loader.dataset.ctpe_list) study2slices = defaultdict(list) study2probs = defaultdict(list) study2labels = {} logger = TestLogger(args, len(data_loader.dataset), data_loader.dataset.pixel_dict) minimum = [] means = [] maximum = [] data = [] # Get model outputs, log to TensorBoard, write masks to disk window-by-window util.print_err('Writing model outputs to {}...'.format(args.results_dir)) with tqdm(total=len(data_loader.dataset), unit=' windows') as progress_bar: for i, (inputs, targets_dict) in enumerate(data_loader): with torch.no_grad(): cls_logits = model.forward(inputs.to(args.device)) cls_probs = F.sigmoid(cls_logits) if args.visualize_all: logger.visualize(inputs, cls_logits, targets_dict=None, phase=args.phase, unique_id=i) max_probs = cls_probs.to('cpu').numpy() for study_num, slice_idx, prob in \ zip(targets_dict['study_num'], targets_dict['slice_idx'], list(max_probs)): # Convert to standard python data types study_num = int(study_num) slice_idx = int(slice_idx) # Save series num for aggregation study2slices[study_num].append(slice_idx) study2probs[study_num].append(prob.item()) series = data_loader.get_series(study_num) if study_num not in study2labels: study2labels[study_num] = int(series.is_positive) progress_bar.update(inputs.size(0)) # Combine masks util.print_err('Combining masks...') max_probs = [] labels = [] study_nums = [] predictions = {} print("Get max prob") for study_num in tqdm(study2slices): # Sort by slice index and get max probability slice_list, prob_list = (list(t) for t in zip( *sorted(zip(study2slices[study_num], study2probs[study_num]), key=lambda slice_and_prob: slice_and_prob[0]))) study2slices[study_num] = slice_list study2probs[study_num] = prob_list max_prob = max(prob_list) max_probs.append(max_prob) label = study2labels[study_num] labels.append(label) study_nums.append(study_num) predictions[study_num] = {'label': label, 'pred': max_prob} #Save predictions to file, indexed by study number print("Saving predictions to pickle files") with open('{}/preds.pickle'.format(args.results_dir), "wb") as fp: pickle.dump(predictions, fp) # Compute AUROC and AUPRC using max aggregation, write to files max_probs, labels = np.array(max_probs), np.array(labels) fpr, tpr, threshold = roc_curve(labels, max_probs) i = np.arange(len(tpr)) roc = pd.DataFrame({ 'tf': pd.Series(tpr - (1 - fpr), index=i), 'threshold': pd.Series(threshold, index=i) }) roc_t = roc.ix[(roc.tf - 0).abs().argsort()[:1]] threshold = 0.5 pred = [1 if p > threshold else 0 for p in max_probs] tn, fp, fn, tp = confusion_matrix(labels, pred).ravel() print("\nTrue Positive Examples\n" + "-" * 80) for i, study_num in enumerate(study_nums): if labels[i] == 1 and pred[i] == 1: print(study_num) print("\nTrue Negative Examples\n" + "-" * 80) for i, study_num in enumerate(study_nums): if labels[i] == 0 and pred[i] == 0: print(study_num) print("\nFalse Negative Examples\n" + "-" * 80) for i, study_num in enumerate(study_nums): if labels[i] == 1 and pred[i] == 0: print(study_num) print("\nFalse Positive Examples\n" + "-" * 80) for i, study_num in enumerate(study_nums): if labels[i] == 0 and pred[i] == 1: print(study_num) print("Total number of data :", len(labels)) print("Total number of positives :", len([l for l in labels if l == 1])) print("Total number of negatives :", len([l for l in labels if l == 0])) print("# True Negative : ", tn) print("# True Positive : ", tp) print("# False Negative : ", fn) print("# False Positive : ", fp) metrics = { args.phase + '_' + 'AUPRC': sk_metrics.average_precision_score(labels, max_probs), args.phase + '_' + 'AUROC': sk_metrics.roc_auc_score(labels, max_probs), } for k, v in metrics.items(): print('{}: {:.5f}\n'.format(k, v)) print("Saving metrics to file") with open(os.path.join(args.results_dir, 'metrics.txt'), 'w') as metrics_fh: for k, v in metrics.items(): metrics_fh.write('{}: {:.5f}\n'.format(k, v)) curves = { args.phase + '_' + 'PRC': sk_metrics.precision_recall_curve(labels, max_probs), args.phase + '_' + 'ROC': sk_metrics.roc_curve(labels, max_probs) } roc = sk_metrics.roc_curve(labels, max_probs) with open("intermountain_roc.pkl", 'wb') as f: pickle.dump(roc, f) for name, curve in curves.items(): curve_np = util.get_plot(name, curve) curve_img = Image.fromarray(curve_np) curve_img.save(os.path.join(args.results_dir, '{}.png'.format(name)))
def bml_map(line, download_dir_name, merged_dir_name): #try: fs = line.strip().split('\t') pics = fs[0:3] title = fs[4] brand = fs[5] audio_url = fs[7] ideaid = fs[-1] m2 = hashlib.md5() output_dir = download_dir_name url_names = [] pic_file_names = [] pic_local_paths = [] q_all=[] ready_pics = 0 for index,p in enumerate(pics): m2.update(p + 'sunfuhao') file_name = m2.hexdigest() fn_suf = file_name+'.jpg' output_path = os.path.join(output_dir, fn_suf) url_names.append(p) pic_file_names.append(fn_suf) pic_local_paths.append(output_path) #download if os.path.exists(output_path): util.print_err("dupulicated image %s" % (file_name)) ready_pics+= 1 continue img_data = None use_proxy = False for i in range(50): #while True: try: img_data = util.http_get(p, use_proxy) util.print_err("%s downloaded succeed" % p) break except Exception as e: util.print_err("%s %s" % (e,p)) use_proxy = not use_proxy util.print_err("use proxy") time.sleep(1) continue if img_data is not None and len(img_data) > 1000: q0 = None for i in range(30): try: q0 = cvtools.req_clarity_bvc(img_data) if q0<0.3 : return None q_all.append(q0) break except Exception as e: util.print_err("fail_clarity") time.sleep(2) continue with open(output_path, 'w') as fn: fn.write(img_data) img1 = cv2.imread(output_path) img1 = cvtools.img_resize(img1, (370, 245)) cv2.imwrite(output_path, img1) ready_pics += 1 else: util.print_err("%s download failed!!!" % p) if ready_pics != 3: util.print_err("has not enough images %s" % (len(pic_file_names))) return img_name1,img_name2,img_name3 = pic_file_names[0],pic_file_names[1],pic_file_names[2] fimg1, fimg2, fimg3 = pic_local_paths img1, img2, img3 = cv2.imread(fimg1), cv2.imread(fimg2), cv2.imread(fimg3) res_dir_chaofen = merged_dir_name #make mapper_quality #for index,files in enumerate(pic_local_paths): # name = "q"+str(index) # for i in range(1, 500): # #while True: # try: # name = cvtools.img_quality_ht(files) # q_all.append(name) # #if name < 0.55: # #return None # break # except Exception as e: # util.print_err("%s %s" % (e,files)) # time.sleep(10) #q0 = cvtools.img_quality_ht(fimg1) #q1 = cvtools.img_quality_ht(fimg2) #q2 = cvtools.img_quality_ht(fimg3) #print q0,q1,q2 #if (q0< 0.55 or q1<0.55 or q2<0.55) : # return None; fn_path1 = os.path.join(res_dir_chaofen, img_name1) fn_path2 = os.path.join(res_dir_chaofen, img_name2) fn_path3 = os.path.join(res_dir_chaofen, img_name3) cv2.imwrite(fn_path1, img1) cv2.imwrite(fn_path2, img2) cv2.imwrite(fn_path3, img3) c0 = cvtools.super_resolution(fn_path1, svr_url, is_local=False) h, w, d = c0.shape[:3] target_sz = (h, int(867), d) c2_p = cvtools.img_padding(c0, target_sz, dir='h', method='gblur') cv2.imwrite(fn_path1, c2_p) with open(fn_path1) as f: base1 = base64.b64encode(f.read()) c0 = cvtools.super_resolution(fn_path2, svr_url, is_local=False) h, w, d = c0.shape[:3] target_sz = (h, int(867), d) c2_p = cvtools.img_padding(c0, target_sz, dir='h', method='gblur') cv2.imwrite(fn_path2, c2_p) with open(fn_path2) as f: base2 = base64.b64encode(f.read()) c0 = cvtools.super_resolution(fn_path3, svr_url, is_local=False) h, w, d = c0.shape[:3] target_sz = (h, int(867), d) c2_p = cvtools.img_padding(c0, target_sz, dir='h', method='gblur') cv2.imwrite(fn_path3, c2_p) with open(fn_path3) as f: base3 = base64.b64encode(f.read()) #fn = open("./log_res", 'a') #templtelist = ['feedinspireing2','Digital Zoom-3','Color Swipe-3','ElegantSlideshow-3'] templtelist = ['99','98','97','96'] templte = random.choice(templtelist) templte = "99" basestr = base1+"\t"+base2+"\t"+base3+"\t"+"end after three pics"+"\n" prjson = '{"video_key":"%s","company":"%s","audio":["%s"],"pic_and_desc":[{"pic_binary":"%s","desc":"%s"},{"pic_binary":"%s","desc":"%s"},{"pic_binary":"%s","desc":"%s"}],"trade":[{"trade_id_1st":"%s","trade_name_1st":"feed"}],"ad_info":{"userid":"%s","planid":"123","unitid":"123","winfoid":"123"},"other_info":{"lp_url":""}}' %(ideaid,title,audio_url,base1,title,base2,brand,base3,title,templte,ideaid) #with open("./data/log_res",'a') as fn: # fn.write(prjson) #fn.close() #if len(q_all) == 3: # print (prjson+"\t"+str(q_all[0])+"\t"+str(q_all[1])+"\t"+str(q_all[2])) #else: # print (prjson+"\t"+"not_3pic") print prjson
base3 = base64.b64encode(f.read()) #fn = open("./log_res", 'a') #templtelist = ['feedinspireing2','Digital Zoom-3','Color Swipe-3','ElegantSlideshow-3'] templtelist = ['99','98','97','96'] templte = random.choice(templtelist) templte = "99" basestr = base1+"\t"+base2+"\t"+base3+"\t"+"end after three pics"+"\n" prjson = '{"video_key":"%s","company":"%s","audio":["%s"],"pic_and_desc":[{"pic_binary":"%s","desc":"%s"},{"pic_binary":"%s","desc":"%s"},{"pic_binary":"%s","desc":"%s"}],"trade":[{"trade_id_1st":"%s","trade_name_1st":"feed"}],"ad_info":{"userid":"%s","planid":"123","unitid":"123","winfoid":"123"},"other_info":{"lp_url":""}}' %(ideaid,title,audio_url,base1,title,base2,brand,base3,title,templte,ideaid) #with open("./data/log_res",'a') as fn: # fn.write(prjson) #fn.close() #if len(q_all) == 3: # print (prjson+"\t"+str(q_all[0])+"\t"+str(q_all[1])+"\t"+str(q_all[2])) #else: # print (prjson+"\t"+"not_3pic") print prjson # except Exception as e: # util.print_err("%s" % e) # util.print_err(line) # return if __name__ == "__main__": download_dir_name = sys.argv[1] merged_dir_name = sys.argv[2] util.print_err("%s %s" % (download_dir_name, merged_dir_name)) global svr_url svr_url = [cvtools.get_server_addr()] for line in sys.stdin: bml_map(line, download_dir_name, merged_dir_name)
def test(args): print ("Stage 1") model, ckpt_info = ModelSaver.load_model(args.ckpt_path, args.gpu_ids) print ("Stage 2") args.start_epoch = ckpt_info['epoch'] + 1 model = model.to(args.device) print ("Stage 3") model.eval() print ("Stage 4") data_loader = CTDataLoader(args, phase=args.phase, is_training=False) study2slices = defaultdict(list) study2probs = defaultdict(list) study2labels = {} logger = TestLogger(args, len(data_loader.dataset), data_loader.dataset.pixel_dict) means = [] # Get model outputs, log to TensorBoard, write masks to disk window-by-window util.print_err('Writing model outputs to {}...'.format(args.results_dir)) with tqdm(total=len(data_loader.dataset), unit=' windows') as progress_bar: for i, (inputs, targets_dict) in enumerate(data_loader): means.append(inputs.mean().data[0]) with torch.no_grad(): cls_logits = model.forward(inputs.to(args.device)) cls_probs = F.sigmoid(cls_logits) if args.visualize_all: logger.visualize(inputs, cls_logits, targets_dict=None, phase=args.phase, unique_id=i) max_probs = cls_probs.to('cpu').numpy() for study_num, slice_idx, prob in \ zip(targets_dict['study_num'], targets_dict['slice_idx'], list(max_probs)): # Convert to standard python data types study_num = int(study_num) slice_idx = int(slice_idx) # Save series num for aggregation study2slices[study_num].append(slice_idx) study2probs[study_num].append(prob.item()) series = data_loader.get_series(study_num) if study_num not in study2labels: study2labels[study_num] = int(series.is_positive) progress_bar.update(inputs.size(0)) # Combine masks util.print_err('Combining masks...') max_probs = [] labels = [] predictions = {} print("Get max probability") for study_num in tqdm(study2slices): # Sort by slice index and get max probability slice_list, prob_list = (list(t) for t in zip(*sorted(zip(study2slices[study_num], study2probs[study_num]), key=lambda slice_and_prob: slice_and_prob[0]))) study2slices[study_num] = slice_list study2probs[study_num] = prob_list max_prob = max(prob_list) max_probs.append(max_prob) label = study2labels[study_num] labels.append(label) predictions[study_num] = {'label':label, 'pred':max_prob} #Save predictions to file, indexed by study number print("Save to pickle") with open('{}/preds.pickle'.format(args.results_dir),"wb") as fp: pickle.dump(predictions,fp) # Write features for XGBoost save_for_xgb(args.results_dir, study2probs, study2labels) # Write the slice indices used for the features print("Write slice indices") with open(os.path.join(args.results_dir, 'xgb', 'series2slices.json'), 'w') as json_fh: json.dump(study2slices, json_fh, sort_keys=True, indent=4) # Compute AUROC and AUPRC using max aggregation, write to files max_probs, labels = np.array(max_probs), np.array(labels) metrics = { args.phase + '_' + 'AUPRC': sk_metrics.average_precision_score(labels, max_probs), args.phase + '_' + 'AUROC': sk_metrics.roc_auc_score(labels, max_probs), } print("Write metrics") with open(os.path.join(args.results_dir, 'metrics.txt'), 'w') as metrics_fh: for k, v in metrics.items(): metrics_fh.write('{}: {:.5f}\n'.format(k, v)) curves = { args.phase + '_' + 'PRC': sk_metrics.precision_recall_curve(labels, max_probs), args.phase + '_' + 'ROC': sk_metrics.roc_curve(labels, max_probs) } for name, curve in curves.items(): curve_np = util.get_plot(name, curve) curve_img = Image.fromarray(curve_np) curve_img.save(os.path.join(args.results_dir, '{}.png'.format(name)))
def print_result(self, results): deflection = results[-1] / (self.load_case.wing.wing_box.end_y * 2) * 100 print("Maximum deflection: {0:.2f} [%]".format(deflection)) if deflection > self.load_case.limit_deflection: util.print_err("Wing box failed: deflection exceeded limits")
def run_models(args, csv_cols, models, task_sequence): """Run models and save predicted probabilities to disk. Args: args: Command-line arguments. csv_cols: List of column headers for the CSV files (should be 'Path' + all pathologies). models: List of (ckpt_path, is_3class) tuples to use for the models. task_sequence: List of tasks to predict for each model. """ data_args = args.data_args logger_args = args.logger_args model_args = args.model_args transform_args = args.transform_args # Get eval loader data_loader = get_loader(data_args, transform_args, data_args.split, TASK_SEQUENCES[data_args.task_sequence], su_frac=1, nih_frac=0, batch_size=args.batch_size, is_training=False, shuffle=False, study_level=True, return_info_dict=True) num_models_finished = 0 for ckpt_path, is_3class in models: output_dir = os.path.join(logger_args.results_dir, data_args.split) output_path = os.path.join( output_dir, '{}.csv'.format(get_checkpoint_identifier(ckpt_path))) if os.path.exists(output_path): print( f"Single model probabilities already written to {output_path}") continue else: if not os.path.isdir(output_dir): os.makedirs(output_dir) # Make an empty file so parallel processes run different models # open(output_path, 'w').close() util.print_err('Running model {} / {} from path {}'.format( num_models_finished + 1, len(models), ckpt_path)) # Get model model_args.model_uncertainty = is_3class model, ckpt_info = ModelSaver.load_model(ckpt_path, args.gpu_ids, model_args, data_args) model = model.to(args.device) model.eval() # Get task sequence predicted by the model model_task_sequence = model.module.task_sequence if model_task_sequence != task_sequence: error_msgs = [ 'Mismatched task sequences:', 'Checkpoint: {}'.format(model_task_sequence), 'Args: {}'.format(TASK_SEQUENCES[data_args.task_sequence]) ] raise ValueError('\n '.join(error_msgs)) # Sample from the data loader and record model outputs csv_rows = [] num_examples = len(data_loader.dataset) with tqdm(total=num_examples, unit=' ' + data_args.split + ' ' + data_args.dataset_name) as progress_bar: for inputs, targets, info_dict, mask in data_loader: with torch.no_grad(): # For Stanford, evaluate on studies if data_args.dataset_name == 'stanford': # Fuse batch size `b` and study length `s` b, s, c, h, w = inputs.size() inputs = inputs.view(-1, c, h, w) # Predict logits = model.forward(inputs.to(args.device)) logits = logits.view(b, s, -1) # Mask padding to negative infinity ignore_where = (mask == 0).unsqueeze(-1).repeat( 1, 1, logits.size(-1)).to(args.device) logits = torch.where(ignore_where, torch.full_like(logits, NEG_INF), logits) logits, _ = torch.max(logits, 1) elif data_args.dataset_name == 'nih': logits = model.forward(inputs.to(args.device)) else: raise ValueError('Invalid dataset name: {}'.format( data_args.dataset_name)) # Save study path and probabilities for each example if is_3class: batch_probs = util.uncertain_logits_to_probs(logits) else: batch_probs = torch.sigmoid(logits) study_paths = info_dict['paths'] for probs, path in zip(batch_probs, study_paths): csv_row = { COL_PATH: path, 'DataSplit': data_args.split } csv_row.update({ task: prob.item() for task, prob in zip(model_task_sequence, probs) }) csv_rows.append(csv_row) progress_bar.update(targets.size(0)) # Write CSV file to disk df = pd.DataFrame(csv_rows) print('Saving single-model probabilities to: {}'.format(output_path)) df[csv_cols].to_csv(output_path, index=False) num_models_finished += 1
def print_result(self, results): twist = results[-1] * 180 / sp.pi print("Maximum twist: {0:.2f} [deg]".format(twist)) if twist > self.load_case.limit_twist: util.print_err("Wing box failed: twist exceeded limits")