示例#1
0
def merge(pkl_paths, output_dir):
    if os.path.exists(output_dir):
        raise ValueError(
            'Output directory {} already exists. Converted dirs should be combined manually.'
            .format(output_dir))
    os.makedirs(output_dir)

    # Collect studies from annotation file
    all_series = []
    for pkl_path in pkl_paths:
        with open(pkl_path, 'rb') as pkl_file:
            add_series = pickle.load(pkl_file)
            all_series += add_series

    # Check for duplicate series
    unique_series = {(s.study_name, s.series_number) for s in all_series}
    if len(unique_series) < len(all_series):
        raise RuntimeError('Found duplicate series in directories {}.'.format(
            ', '.join(pkl_paths)))

    # Write summary file for all series
    util.print_err('Dumping pickle file...')
    with open(os.path.join(output_dir, 'series_list.pkl'), 'wb') as pkl_file:
        pickle.dump(all_series, pkl_file)
    util.print_err('Dumping JSON file...')
    with open(os.path.join(output_dir, 'series_list.json'), 'w') as json_file:
        json.dump([dict(series) for series in all_series],
                  json_file,
                  indent=4,
                  sort_keys=True,
                  default=util.json_encoder)
def restructure_directory(input_dir, output_dir, json_path):
    with open(json_path, 'r') as json_file:
        dcm_dict = json.load(json_file)

    for acc, series_dict in tqdm(dcm_dict.items()):
        folder = os.path.join(input_dir, acc)
        subfolder = [s for s in os.listdir(folder) if s.startswith('ST')]
        assert len(subfolder) == 1, "Multiple subfolders present in {}.".format(acc)
        subfolder = subfolder[0]

        if len(series_dict) > 1:
            continue

        try:
            for series_acq, inst_num in series_dict.items():

                dcm = util.read_dicom(os.path.join(folder, subfolder, series_dict[series_acq]['1'][0]))
                description = dcm.SeriesDescription.replace('/', ' ')
                Path(os.path.join(output_dir, acc, description)).mkdir(parents=True, exist_ok=True)

                series_num = series_acq.split('_')[0]
                if len(inst_num) < 10:
                    continue
                    
                for i in range(1, len(inst_num) + 1):
                    source_path = os.path.join(folder, subfolder, inst_num[str(i)][0])
                    dest_path = os.path.join(output_dir, acc, description, inst_num[str(i)][0])
                    shutil.copy(source_path, dest_path)
                    os.rename(dest_path,
                              os.path.join(output_dir, acc, description,
                                           'IM-' + series_num.zfill(4) + '-' + str(i).zfill(4) + '.dcm'))
        except:
            util.print_err('Error occurred while copying {}. Skipping...'.format(acc))
            continue
示例#3
0
 def print_result(self, results):
     abs_min = abs(min(results))
     abs_max = abs(max(results))
     max_value = max(results) if abs_max > abs_min else min(results)
     pos = results.index(max_value) * self.load_case.step
     print("Maximum stress in bottom panel: {0:.3e} [Pa] at {1:.2f} [m]".
           format(max_value, pos))
     if max_value > self.wing_box.material.yield_stress:
         util.print_err(
             "Wing box failed: bottom panel stress exceeded yield stress.")
示例#4
0
def req_clarity_bvc(img_data):
    #with open(img_file_path) as f:
    request_pb = general_classify_client.GeneralClassifyRequest()
    request_pb.image = img_data
    classify_type = request_pb.classify_type.add()
    classify_type.type_name = 'clarity'
    classify_type.topnum = 1
    request_str = request_pb.SerializePartialToString()

    logid = random.randint(1000000, 100000000)
    #requestinfo = {
    #        'image': base64.b64encode(f.read()),
    #        }
    #data = json.dumps(requestinfo)

    req_array = {
        'appid': '123456',
        'logid': logid,
        'format': 'json',
        'from': 'test-python',
        'cmdid': '123',
        'clientip': '0.0.0.0',
        'data': base64.b64encode(request_str),
    }
    req_json = json.dumps(req_array)

    url = conf.api['req_clarity_bvc']
    url = random.choice([
        'http://10.156.86.15:8134/GeneralClassifyService/classify',
        'http://10.156.86.15:8135/GeneralClassifyService/classify'
    ])
    req = urllib2.Request(url)
    req.add_header('Content-Type', 'application/json')
    response = None
    for i in range(1, 50):
        try:
            response = urllib2.urlopen(req, req_json, 1)
            res_str_tmp = response.read()
            json_res = json.loads(res_str_tmp)
            if "err_no" not in json_res:
                return "no err_no"

            if json_res["err_no"] != 0:
                return "err_no is not 0\t" + res_str_tmp
            res_pb = general_classify_client.GeneralClassifyResponse()

            res_pb.ParseFromString(base64.b64decode(json_res['result']))
            for result in res_pb.result:
                if result.type_name == "clarity":
                    res = (result.probability[0] + 6) / 12
            break
        except Exception as e:
            util.print_err(e)
            time.sleep(1)
    return res
示例#5
0
 def print_result(self, min_margin):
     print("")
     print("Results for skin buckling")
     failure = False
     for section in self.wing_box.sections:
         if min_margin[section][0] < 1: failure = True
         print(
             "Wing box section range: {0:.2f}, {1:.2f} [m]; Lowest margin of safety: {2:.2f} on plate with width {3:.2f} [m]"
             .format(section.start_y, section.end_y, min_margin[section][0],
                     min_margin[section][1].width))
     if failure: util.print_err("Wing box failed due to skin buckling")
示例#6
0
 def print_result(self, min_margin):
     print("")
     print("Results for shear buckling")
     failure = False
     for section in self.wing_box.sections:
         if min_margin[section][0] < 1: failure = True
         print(
             "Wing box section range: {0:.2f}, {1:.2f} [m]; Lowest margin of safety: {2:.2f} on {3}"
             .format(
                 section.start_y, section.end_y, min_margin[section][0],
                 "front spar" if min_margin[section][1] else "back spar"))
     if failure: util.print_err("Wing box failed due to shear buckling")
示例#7
0
    def __init__(self, config: Dict[str, str]):
        if not 'detector_model' in config:
            print_err('Error: not found "detector_model" in config')
            raise ValueError
        self.detector = Detector(config['detector_model'])

        self.registry = None
        if 'chrome_web_driver' in config:
            self.registry = RegistryRequester(
                selenium_driver_path=config['chrome_web_driver'])
        else:
            self.registry = RegistryRequester()
示例#8
0
def super_resolution_bvc(img_file_path, url, option='super_resolution'):
    with open(img_file_path) as f:
        logid = random.randint(1000000, 100000000)
        requestinfo = {
            'image': base64.b64encode(f.read()),
            'type_name': 'image_restoration',
            'option': option,
        }
        data = json.dumps(requestinfo)

        req_array = {
            'jsonrpc':
            '2.0',
            'method':
            'classify',
            'id':
            '123',
            'params': [{
                'appid': '123456',
                'logid': logid,
                'format': 'json',
                'from': 'test-python',
                'cmdid': '123',
                'clientip': '0.0.0.0',
                'data': base64.b64encode(data),
            }]
        }
        req_json = json.dumps(req_array)
        res = None
        res_json = None

        for i in range(1, 50):
            try:
                req = urllib2.Request(url[0])
                req.add_header('Content-Type', 'application/json')
                response = urllib2.urlopen(req, req_json, 10000)
                res = json.loads(response.read())
                res_json = json.loads(
                    base64.b64decode(res['result']['_ret']['result']))
                img_str = base64.b64decode(res_json['image'])
                nparr = np.fromstring(img_str, np.uint8)
                img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
                break
            except Exception as e:
                util.print_err(e)
                util.print_err("bvc req failed. change server")
                util.print_err(res)
                util.print_err(res_json)
                time.sleep(5)
                url[0] = get_server_addr()
                util.print_err(url)
        return img
示例#9
0
 def print_result(self, min_margin):
     print("")
     print("Results for column buckling")
     failure = False
     for section in self.wing_box.sections:
         if min_margin[section][0] < 1: failure = True
         print(
             "Wing box section range: {0:.2f}, {1:.2f} [m]; Lowest margin of safety: {2:.2f} on {3} set with size {4}, {5} [m]"
             .format(section.start_y, section.end_y, min_margin[section][0],
                     min_margin[section][1].stringer_type.name,
                     min_margin[section][1].stringer_width,
                     min_margin[section][1].stringer_height))
     if failure: util.print_err("Wing box failed due to column buckling")
示例#10
0
文件: main.py 项目: zdcspace/fuchsia
def transitions_from_args(args) -> Dict[Binding, Transition]:
    transitions_by_binding = {}
    for binding in Binding:
        transition = getattr(args, binding.value)
        if transition is None:
            continue
        if transition not in transitions:
            print_err(
                f'error: undefined transition {transition} for binding {binding.value}'
            )
            return
        transitions_by_binding[binding] = transitions[transition]
    return transitions_by_binding
示例#11
0
def get_server_addr():
    proxy_url = conf.api['bvc_proxy']
    for i in range(10):
        try:
            res_proxy = json.loads(util.http_get(proxy_url))
            server_json = random.choice(res_proxy['result']['ServerInfo'])
            addr = server_json['Server']['service_addr'][0]
            break
        except Exception as e:
            util.print_err(e)
            util.print_err("getting proxy url...")
            time.sleep(1)
    ip = addr.split(':')[0]
    port = 40077
    url = "http://%s:%s/1" % (ip, port)
    return url
示例#12
0
def run_test_setup(test_root: Path) -> (CompatTest, str):
    """
    Runs the user through prompts to get the minimum amount of information to
    return a new CompatTest.
    """
    title = input(
        'Enter a human readable title for your test (e.g. "Add a protocol method"): '
    )

    # initialize FIDL file
    fidl_name = input(
        f'Enter name for initial {pink("FIDL")} file (e.g. "before.test.fidl"): '
    )
    fidl_name = prepend_step(fidl_name, step=0)
    fidl_library_name = test_name_to_fidl_name(test_root.name)
    scaffolding.initialize_fidl(test_root / FIDL_DIR / fidl_name,
                                fidl_library_name)
    fidl_ref: FidlRef = stem(fidl_name)

    # initialize bindings
    bindings = {}
    for binding in BINDINGS:
        filename = input(
            f'Enter name for initial {pink(binding)} file (e.g. "before.{EXTENSIONS[binding]}"), or leave empty to skip binding: '
        )
        if not filename:
            continue
        filename = prepend_step(filename, step=0)
        scaffolding.initialize_src(test_root / binding / filename, binding,
                                   fidl_library_name)
        bindings[binding] = Steps(starting_fidl=fidl_ref,
                                  starting_src=f'{binding}/{filename}',
                                  steps=[])
    if not bindings:
        print_err('Must include at least one binding to define a test')
        sys.exit(1)

    new_test = CompatTest(title=title,
                          fidl={
                              fidl_ref:
                              FidlDef(source=f'{FIDL_DIR}/{fidl_name}',
                                      instructions=[])
                          },
                          bindings=bindings)

    return (new_test, fidl_name)
示例#13
0
def main(args):
    df = pd.read_csv(args.csv_path)
    examples = [
        CatalPhoto(url=str(row['netpublish_URL']), annotation=None)
        for _, row in df.iterrows()
    ]

    # Make directories for holding photos
    for dir_name in ('wb_pos', 'wb_neg', 'unlabeled'):
        os.makedirs(os.path.join(args.output_dir, dir_name), exist_ok=True)

    session = requests.Session()
    retry = Retry(connect=3, backoff_factor=0.5)
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)

    # Download photos
    for example in tqdm(examples):
        if example.is_labeled:
            subdir_name = 'wb_{}'.format(
                'pos' if example.has_whiteboard else 'neg')
        else:
            subdir_name = 'unlabeled'
        file_name = '{}.jpg'.format(example.record_id)
        img_path = os.path.join(args.output_dir, subdir_name, file_name)
        if os.path.exists(img_path):
            util.print_err('Already downloaded {}'.format(img_path))
            continue
        url = example.url.replace('original', 'preview')

        try:
            response = session.get(url, stream=True, timeout=10)
            with open(img_path, 'wb') as out_file:
                shutil.copyfileobj(response.raw, out_file)
            del response

        except Exception as e:
            print('Error downloading from {}: {}'.format(url, e))
            continue

        # Down-sample the image
        if args.resize_shape is not None:
            img = Image.open(img_path, 'r').convert('RGB')
            img = img.resize(args.resize_shape)
            img.save(img_path)
示例#14
0
def test(args):

    model, ckpt_info = ModelSaver.load_model(args.ckpt_path, args.gpu_ids)
    args.start_epoch = ckpt_info['epoch'] + 1
    model = model.to(args.device)
    model.eval()

    data_loader = get_loader(args, phase=args.phase, is_training=False)
    logger = TestLogger(args, len(data_loader.dataset))

    # Get model outputs, log to TensorBoard, write masks to disk window-by-window
    util.print_err('Writing model outputs to {}...'.format(args.results_dir))

    all_gender = []
    all_age = []
    all_tte = []
    all_is_alive = []
    all_mu = []
    all_s2 = []
    with tqdm(total=len(data_loader.dataset), unit=' windows') as progress_bar:
        for i, (src, tgt) in enumerate(data_loader):
            all_gender.extend([int(x) for x in src[:, 0]])
            all_age.extend([float(x) for x in src[:, 1]])
            all_tte.extend([float(x) for x in tgt[:, 0]])
            all_is_alive.extend([int(x) for x in tgt[:, 1]])
            with torch.no_grad():
                pred_params = model.forward(src.to(args.device))

                # import pdb
                # pdb.set_trace()
                outputs = pred_params.cpu().numpy()
                all_mu.extend([float(x) for x in outputs[:, 0]])
                all_s2.extend([float(x) for x in outputs[:, 1]])

            progress_bar.update(src.size(0))

    # print pred_params (mu, s) to file
    fd = open(args.results_dir + '/test_stats.csv', 'w')
    fd.write('gender, age, tte, is_alive, mu, s2\n')
    for gender, age, tte, is_alive, mu, s2 \
        in zip(all_gender, all_age, all_tte, all_is_alive, all_mu, all_s2):

        fd.write('%d, %f, %f, %d, %f, %f\n' %
                 (gender, age, tte, is_alive, mu, s2))
    fd.close()
示例#15
0
    def fine_tuning_parameters(self, fine_tuning_boundary, fine_tuning_lr=0.0):
        """Get parameters for fine-tuning the model.
        Args:
            fine_tuning_boundary: Name of first layer after the fine-tuning layers.
            fine_tuning_lr: Learning rate to apply to fine-tuning layers (all layers before `boundary_layer`).
        Returns:
            List of dicts that can be passed to an optimizer.
        """
        def gen_params(boundary_layer_name, fine_tuning):
            """Generate parameters, if fine_tuning generate the params before boundary_layer_name.
            If unfrozen, generate the params at boundary_layer_name and beyond."""
            saw_boundary_layer = False
            for name, param in self.named_parameters():
                if name.startswith(boundary_layer_name):
                    saw_boundary_layer = True

                if saw_boundary_layer and fine_tuning:
                    return
                elif not saw_boundary_layer and not fine_tuning:
                    continue
                else:
                    yield param

        # Fine-tune the network's layers from encoder.2 onwards
        optimizer_parameters = [{
            'params':
            gen_params(fine_tuning_boundary, fine_tuning=True),
            'lr':
            fine_tuning_lr
        }, {
            'params':
            gen_params(fine_tuning_boundary, fine_tuning=False)
        }]

        # Debugging info
        util.print_err('Number of fine-tuning layers: {}'.format(
            sum(1
                for _ in gen_params(fine_tuning_boundary, fine_tuning=True))))
        util.print_err('Number of regular layers: {}'.format(
            sum(1
                for _ in gen_params(fine_tuning_boundary, fine_tuning=False))))

        return optimizer_parameters
示例#16
0
def get_series_numbers(args):
    with open(os.path.join(args.output_dir, 'dir2type.json'), 'r') as json_fh:
        dir2type = json.load(json_fh)
    df = pd.read_csv(args.input_csv)

    for i, row in df.iterrows():
        series_dir = os.path.join(args.data_dir, str(row['Acc']))
        if os.path.exists(series_dir):
            print('Found at {}'.format(series_dir))
            for subdir in os.listdir(series_dir):
                if subdir not in dir2type:
                    while True:
                        try:
                            input_num = int(
                                input('{} (0=contrast, 1=other)?\n>>> '.format(
                                    subdir)))
                            if input_num == 0 or input_num == 1:
                                break
                        except ValueError:
                            continue
                    dir2type[
                        subdir] = 'contrast' if input_num == 0 else 'non_contrast'

                if dir2type[subdir] == 'contrast':
                    print('{} is contrast'.format(subdir))
                    dcm_dir = os.path.join(series_dir, subdir)
                    dcm_names = [
                        f for f in os.listdir(dcm_dir) if f.endswith('.dcm')
                    ]
                    dcm = util.read_dicom(os.path.join(dcm_dir, dcm_names[0]))
                    df.loc[i, 'CTA se'] = int(dcm.SeriesNumber)

    # Write CSV and dir2type mapping
    util.print_err('Dumping CSV file...')
    df.to_csv(os.path.join(args.output_dir, 'updated_annotations.csv'))
    util.print_err('Dumping JSON file...')
    with open(os.path.join(args.output_dir, 'dir2type.json'), 'w') as json_fh:
        json.dump(dir2type,
                  json_fh,
                  indent=4,
                  sort_keys=True,
                  default=util.json_encoder)
示例#17
0
文件: test.py 项目: meajagun/papers
def test(args):

    model, ckpt_info = ModelSaver.load_model(args.ckpt_path, args.gpu_ids)
    args.start_epoch = ckpt_info['epoch'] + 1
    model = model.to(args.device)
    model.eval()

    data_loader = CIFARLoader('val', args.batch_size, args.num_workers)

    # Get model outputs, log to TensorBoard, write masks to disk window-by-window
    util.print_err('Writing model outputs to {}...'.format(args.results_dir))
    with tqdm(total=len(data_loader.dataset), unit=' examples') as progress_bar:
        for i, (inputs, info_dict) in enumerate(data_loader):
            with torch.no_grad():
                logits = model.forward(inputs.to(args.device))
                probs = F.softmax(logits)

            # TODO: Test script is incomplete. Does nothing with the outputs.

            progress_bar.update(inputs.size(0))
示例#18
0
def get_parameters(model, args):
    """Get parameter generators for a model.

    Args:
        model: Model to get parameters from.
        args: Command-line arguments.

    Returns:
        Dictionary of parameter generators that can be passed to a PyTorch optimizer.
    """

    def gen_params(boundary_layer_name, fine_tuning):
        """Generate parameters, if fine_tuning generate the params before boundary_layer_name.
        If unfrozen, generate the params at boundary_layer_name and beyond."""
        saw_boundary_layer = False
        for name, param in model.named_parameters():
            if name.startswith(boundary_layer_name):
                saw_boundary_layer = True

            if saw_boundary_layer and fine_tuning:
                return
            elif not saw_boundary_layer and not fine_tuning:
                continue
            else:
                yield param

    # Fine-tune the network's layers from encoder.2 onwards
    if args.pretrained or args.fine_tune:
        optimizer_parameters = [{'params': gen_params(args.fine_tuning_boundary, fine_tuning=True),
                                 'lr': args.fine_tuning_lr},
                                {'params': gen_params(args.fine_tuning_boundary, fine_tuning=False)}]
    else:
        optimizer_parameters = [{'params': gen_params(args.fine_tuning_boundary, fine_tuning=False)}]

    # Debugging info
    util.print_err('Number of fine-tuning layers: {}'
                   .format(sum(1 for _ in gen_params(args.fine_tuning_boundary, fine_tuning=True))))
    util.print_err('Number of regular layers: {}'
                   .format(sum(1 for _ in gen_params(args.fine_tuning_boundary, fine_tuning=False))))

    return optimizer_parameters
示例#19
0
def img_quality_ht(img_file_path):
    with open(img_file_path) as f:
        it = InnerToken()
        token = it.generateToken(**conf.apiInfo_2)
        req_json = {
            'image': base64.b64encode(f.read()),
            'access_token': token,
        }
        url = conf.api['img_quality_ht']
        for i in range(1, 50):
            try:
                res_json = util.http_post(url, req_json)
                quality = res_json['result']
                log_id = res_json['log_id']
                #img_str = base64.b64decode(res_json['result']['image'])
                #nparr = np.fromstring(img_str, np.uint8)
                #img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
                break
            except Exception as e:
                util.print_err(e)
                util.print_err(res_json)
                time.sleep(1)
    return quality
示例#20
0
def super_resolution_ht(img_file_path, option='super_resolution'):
    with open(img_file_path) as f:
        it = InnerToken()
        token = it.generateToken(**conf.apiInfo)
        req_json = {
            'image': base64.b64encode(f.read()),
            'option': option,
            'access_token': token,
        }
        url = conf.api['super_resolution_ht']
        for i in range(1, 50):
            try:
                res_json = util.http_post(url, req_json)
                img_str = base64.b64decode(res_json['result']['image'])
                nparr = np.fromstring(img_str, np.uint8)
                img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
                break
            except Exception as e:
                util.print_err(e)
                util.print_err(res_json)
                time.sleep(1)

    return img
def run_classifier(args):
    """Run a trained XGBoost model and get metrics."""

    test_filepath, ext = os.path.splitext(args.test_path)
    test_dir = os.path.dirname(args.test_path)

    if ext == '.svmlight':
        test_inputs, test_labels = load_svmlight_file(args.test_path)
        test_inputs = test_inputs.todense()
    elif ext == '.npy':
        test_inputs = np.load(args.test_path)
        test_labels = np.load(os.path.join(test_dir, 'labels.npy'))
    else:
        test_inputs = pd.read_csv(args.test_path)
        test_labels = pd.read_csv(os.path.join(test_dir, 'labels.csv'))

    test_data = xgb.DMatrix(test_inputs, label=test_labels, missing=0.)

    if not os.path.exists(os.path.join(args.model_dir, args.name + '.model')):
        raise IOError("Could not load model from path {}.".format(os.path.join(args.model_dir, args.name + '.model')))

    model = xgb.Booster()
    model.load_model(os.path.join(args.model_dir, args.name + '.model'))
    test_probs = model.predict(test_data)

    metrics = {'Accuracy': sk_metrics.accuracy_score(test_labels, test_probs > 0.5),
               'AUROC': sk_metrics.roc_auc_score(test_labels, test_probs),
               'AUPRC': sk_metrics.average_precision_score(test_labels, test_probs)}

    util.print_err('Performance of model at {}:'.format(os.path.join(args.model_dir, args.name + '.model')))
    for k, v in metrics.items():
        print('{} = {:.4f}'.format(k, v))

    test_probs_path = os.path.join(os.path.dirname(args.test_path), '{}_preds.npy'.format(args.name))
    util.print_err('Saving predictions to {}...'.format(test_probs_path))
    np.save(test_probs_path, test_probs)
def create_hdf5(series_list, output_dir, resample=False, max_series=1e5):
    hdf5_fh = h5py.File(os.path.join(output_dir, 'data.hdf5'), 'a')
    for group_name in ('series', 'aneurysm_masks'):
        if group_name not in hdf5_fh:
            hdf5_fh.create_group('/{}'.format(group_name))

    assert len(series_list) < 1e5, 'Too many series for 5-digit IDs.'
    for i, s in enumerate(series_list):
        if i >= max_series:
            break
        dset_path = '/series/{:05d}'.format(i + 1)
        if dset_path in hdf5_fh:
            continue
        print('Processing series {} from study {}...'.format(
            s.series_number, s.study_name))
        pixel_arrays = []
        is_valid_series = True
        for slice_name in tqdm(s.slice_names, total=len(s), unit=' slices'):
            # Process and write slices
            dcm_path = os.path.join(s.dcm_dir, slice_name + '.dcm')
            dcm = util.read_dicom(dcm_path)
            try:
                pixel_arrays.append(util.dcm_to_raw(dcm))
            except NotImplementedError:
                print('Unsupported image format, not converting study: {}'.
                      format(s.study_name))
                is_valid_series = False
                break
        if not is_valid_series:
            continue

        volume = np.stack(pixel_arrays)

        aneurysm_mask_path = os.path.join(s.dcm_dir, 'aneurysm_mask.npy')
        if os.path.exists(aneurysm_mask_path):
            s.aneurysm_mask_path = aneurysm_mask_path
            aneurysm_mask = np.transpose(np.load(s.aneurysm_mask_path),
                                         [2, 0, 1])
        else:
            s.aneurysm_mask_path = None
            aneurysm_mask = None

        assert aneurysm_mask is None or aneurysm_mask.shape == volume.shape, \
            'Mismatched aneurysm mask and volume shapes: {} and {}'.format(aneurysm_mask.shape, volume.shape)
        if len(s) > 0 and resample:
            util.print_err('Resampling volume... Shape before: {}'.format(
                volume.shape))
            tick = time.time()
            dcm = util.read_dicom(
                os.path.join(s.dcm_dir, s.slice_names[0] + '.dcm'))
            volume, real_scale = util.resample(volume, dcm.SliceThickness,
                                               dcm.PixelSpacing, (1.5, 1., 1.))
            util.print_err('Shape after: {}. Resample took {} s.'.format(
                volume.shape,
                time.time() - tick))
            if aneurysm_mask is not None:
                util.print_err(
                    'Resampling mask... Shape before: {}, count before: {}.'.
                    format(aneurysm_mask.shape, np.sum(aneurysm_mask > 0)))
                tick = time.time()
                aneurysm_mask, mask_scale = util.resample(
                    aneurysm_mask, dcm.SliceThickness, dcm.PixelSpacing,
                    (1.5, 1., 1.))
                util.print_err(
                    'Mask shape after: {}, count after: {}. Resample took {} s.'
                    .format(aneurysm_mask.shape, np.sum(aneurysm_mask > 0),
                            time.time() - tick))
                if not aneurysm_mask.any():
                    raise RuntimeError(
                        'Mask has zero volume after resampling.')

                if s.is_aneurysm:
                    # Recompute slice numbers where the aneurysm lives
                    s.aneurysm_bounds = get_aneurysm_range(aneurysm_mask)
                    s.aneurysm_ranges = [s.aneurysm_bounds]
                    s.absolute_range = [0, aneurysm_mask.shape[0]]

        # Create one dataset for the volume (int16), one for the mask (bool)
        s.dset_path = dset_path
        hdf5_fh.create_dataset(s.dset_path,
                               data=volume,
                               dtype='i2',
                               chunks=True)

        if aneurysm_mask is not None:
            s.aneurysm_mask_path = '/aneurysm_masks/{:05d}'.format(i + 1)
            hdf5_fh.create_dataset(s.aneurysm_mask_path,
                                   data=aneurysm_mask,
                                   dtype='?',
                                   chunks=True)

    # Print summary
    util.print_err('Series: {}'.format(len(hdf5_fh['/series'])))
    util.print_err('Aneurysm Masks: {}'.format(len(
        hdf5_fh['/aneurysm_masks'])))

    # Dump pickle and JSON (updated dset_path and mask_path attributes)
    util.print_err('Dumping pickle file...')
    with open(os.path.join(output_dir, 'series_list.pkl'), 'wb') as pkl_fh:
        pickle.dump(series_list, pkl_fh)
    util.print_err('Dumping JSON file...')
    with open(os.path.join(output_dir, 'series_list.json'), 'w') as json_file:
        json.dump([dict(series) for series in series_list],
                  json_file,
                  indent=4,
                  sort_keys=True,
                  default=util.json_encoder)

    # Clean up
    hdf5_fh.close()
def test(args):
    print ("Stage 1")
    model, ckpt_info = ModelSaver.load_model(args.ckpt_path, args.gpu_ids)
    print ("Stage 2")
    args.start_epoch = ckpt_info['epoch'] + 1
    model = model.to(args.device)
    print ("Stage 3")
    model.eval()
    print ('This should be false: {}'.format(model.training))
    print ("Stage 4")
    data_loader = CTDataLoader(args, phase=args.phase, is_training=False)
    #print('data_loader={}'.format(data_loader))
    #print('data_loader.dataset={}'.format(data_loader.dataset))
    study2slices = defaultdict(list)
    study2probs = defaultdict(list)
    study2labels = {}
    logger = TestLogger(args, len(data_loader.dataset), data_loader.dataset.pixel_dict)
    print("Stage 5")
    f = open('/projectnb/ece601/kaggle-pulmonary-embolism/meganmp/train/series_list.pkl','rb')
    data_labels = pickle.load(f)

    # Create list to manually process labels
    #with open('positive.txt') as f:
         #pos_labels = f.readlines()
    #pos_labels = [x.strip() for x in pos_labels]
    ispos = [x.is_positive for x in data_labels]
    isposidx = [x.study_num for x in data_labels]
    label_dict = {}
    for i in range(len(ispos)):
        label_dict[isposidx[i]] = ispos[i]

    for key in label_dict.keys():
        print('label_dict={}\t{}'.format(key, label_dict[key]))
    

    # Get model outputs, log to TensorBoard, write masks to disk window-by-window
    util.print_err('Writing model outputs to {}...'.format(args.results_dir))
    with tqdm(total=len(data_loader.dataset), unit=' windows') as progress_bar:
        for i, (inputs, targets_dict) in enumerate(data_loader):
            with torch.no_grad():
                cls_logits = model.forward(inputs.to(args.device))
                cls_probs = torch.sigmoid(cls_logits)

            if args.visualize_all:
                logger.visualize(inputs, cls_logits, targets_dict=None, phase=args.phase, unique_id=i)

            max_probs = cls_probs.to('cpu').numpy()
            for study_num, slice_idx, prob in \
                    zip(targets_dict['study_num'], targets_dict['slice_idx'], list(max_probs)):
                #print('targets_dict[studynum]={}'.format(targets_dict['study_num']))
                #print('targets_dict[sliceidx]={}'.format(targets_dict['slice_idx']))
                # Convert to standard python data types
                study_num = study_num #.item()
                #study_num = int(study_num)
                slice_idx = int(slice_idx)

                # Save series num for aggregation
                study2slices[study_num].append(slice_idx)
                study2probs[study_num].append(prob.item())


                series = data_loader.get_series(study_num)
                if study_num not in study2labels:
                    print('study_num={}'.format(study_num))
                    print('series.is_positive={}'.format(label_dict[study_num]))
                    study2labels[study_num] = label_dict[study_num]
                    #if study_num in pos_labels:
                        #print('DEBUG -------=1?-------------------')
                        #print('POS LABEL')
                        #print('study_num={}'.format(study_num))
                        #study2labels[study_num] = 1
                    #else:
                        #print('Not in study2labels. series = {}'.format(study_num))
                        #print('series.is_positive={}'.format(series.is_positive))
                        #study2labels[study_num] = int(series.is_positive)
                        #print('study2labels: {}'.format(study2labels[study_num]))

            progress_bar.update(inputs.size(0))

    print('study2labels={}'.format(study2labels))

    # Combine masks
    util.print_err('Combining masks...')
    max_probs = []
    labels = []
    predictions = {}
    print("Get max prob")
    for study_num in tqdm(study2slices):

        # Sort by slice index and get max probability
        slice_list, prob_list = (list(t) for t in zip(*sorted(zip(study2slices[study_num], study2probs[study_num]),
                                                              key=lambda slice_and_prob: slice_and_prob[0])))
        study2slices[study_num] = slice_list
        study2probs[study_num] = prob_list
        max_prob = max(prob_list)
        print('study={}\tmax_prob={}'.format(study_num, max_prob))
        max_probs.append(max_prob)
        label = study2labels[study_num]
        labels.append(label)
        predictions[study_num] = {'label':label, 'pred':max_prob}

    #Save predictions to file, indexed by study number
    print("Saving predictions to pickle files")
    with open('{}/preds.pickle'.format(args.results_dir),"wb") as fp:
        pickle.dump(predictions,fp)

    results_series = [k for k,_ in predictions.items()]
    results_pred = [v['pred'] for _,v in predictions.items()]
    results_label = [v['label'] for _,v in predictions.items()]
    print('roc_auc_score={}'.format(roc_auc_score(results_label, results_pred)))

    # Create dataframe summary
    TRAIN_CSV = '/projectnb/ece601/kaggle-pulmonary-embolism/rsna-str-pulmonary-embolism-detection/train.csv'
    train_df = pd.read_csv(TRAIN_CSV)
    train_df = train_df[['SeriesInstanceUID', 'negative_exam_for_pe']]
    train_df = train_df.groupby('SeriesInstanceUID').aggregate(list)
    train_df['pe_label'] = train_df['negative_exam_for_pe'].apply(lambda x: 0 if 1 in x else 1)

    results_dict = {
        'series': results_series,
        'pred': results_pred
    }
    results_df = pd.DataFrame.from_dict(results_dict)

    results_df = results_df.set_index('series')
    results_df = results_df.join(train_df, how='left').reset_index().rename({'index': 'series'})
    print('roc_auc_score={}'.format(roc_auc_score(results_df['pe_label'], results_df['pred'])))
    
    # Calculate confusion matrix
    results_df['interpretation'] = results_df['pred'].apply(lambda x: 0 if x < 0.5 else 1)
    print(results_df.head(10))
    tn, fp, fn, tp = confusion_matrix(results_df['pe_label'], results_df['interpretation']).ravel()
    print('confusion_matrix: [{} {} {} {}]'.format(tp, fp, fn, tn))
示例#24
0
def test(args):
    print("Stage 1")
    model, ckpt_info = ModelSaver.load_model(args.ckpt_path, args.gpu_ids)
    print("Stage 2")
    args.start_epoch = ckpt_info['epoch'] + 1
    model = model.to(args.device)
    print("Stage 3")
    model.eval()
    print("Stage 4")
    data_loader = CTDataLoader(args, phase=args.phase, is_training=False)
    print(data_loader.dataset.ctpe_list)
    study2slices = defaultdict(list)
    study2probs = defaultdict(list)
    study2labels = {}
    logger = TestLogger(args, len(data_loader.dataset),
                        data_loader.dataset.pixel_dict)

    minimum = []
    means = []
    maximum = []
    data = []

    # Get model outputs, log to TensorBoard, write masks to disk window-by-window
    util.print_err('Writing model outputs to {}...'.format(args.results_dir))
    with tqdm(total=len(data_loader.dataset), unit=' windows') as progress_bar:
        for i, (inputs, targets_dict) in enumerate(data_loader):

            with torch.no_grad():
                cls_logits = model.forward(inputs.to(args.device))
                cls_probs = F.sigmoid(cls_logits)

            if args.visualize_all:
                logger.visualize(inputs,
                                 cls_logits,
                                 targets_dict=None,
                                 phase=args.phase,
                                 unique_id=i)

            max_probs = cls_probs.to('cpu').numpy()
            for study_num, slice_idx, prob in \
                    zip(targets_dict['study_num'], targets_dict['slice_idx'], list(max_probs)):
                # Convert to standard python data types
                study_num = int(study_num)
                slice_idx = int(slice_idx)

                # Save series num for aggregation
                study2slices[study_num].append(slice_idx)
                study2probs[study_num].append(prob.item())

                series = data_loader.get_series(study_num)
                if study_num not in study2labels:
                    study2labels[study_num] = int(series.is_positive)

            progress_bar.update(inputs.size(0))

    # Combine masks
    util.print_err('Combining masks...')
    max_probs = []
    labels = []
    study_nums = []
    predictions = {}
    print("Get max prob")
    for study_num in tqdm(study2slices):

        # Sort by slice index and get max probability
        slice_list, prob_list = (list(t) for t in zip(
            *sorted(zip(study2slices[study_num], study2probs[study_num]),
                    key=lambda slice_and_prob: slice_and_prob[0])))
        study2slices[study_num] = slice_list
        study2probs[study_num] = prob_list
        max_prob = max(prob_list)
        max_probs.append(max_prob)
        label = study2labels[study_num]
        labels.append(label)
        study_nums.append(study_num)
        predictions[study_num] = {'label': label, 'pred': max_prob}

    #Save predictions to file, indexed by study number
    print("Saving predictions to pickle files")
    with open('{}/preds.pickle'.format(args.results_dir), "wb") as fp:
        pickle.dump(predictions, fp)

    # Compute AUROC and AUPRC using max aggregation, write to files
    max_probs, labels = np.array(max_probs), np.array(labels)

    fpr, tpr, threshold = roc_curve(labels, max_probs)
    i = np.arange(len(tpr))
    roc = pd.DataFrame({
        'tf': pd.Series(tpr - (1 - fpr), index=i),
        'threshold': pd.Series(threshold, index=i)
    })
    roc_t = roc.ix[(roc.tf - 0).abs().argsort()[:1]]

    threshold = 0.5
    pred = [1 if p > threshold else 0 for p in max_probs]

    tn, fp, fn, tp = confusion_matrix(labels, pred).ravel()

    print("\nTrue Positive Examples\n" + "-" * 80)
    for i, study_num in enumerate(study_nums):
        if labels[i] == 1 and pred[i] == 1: print(study_num)
    print("\nTrue Negative Examples\n" + "-" * 80)
    for i, study_num in enumerate(study_nums):
        if labels[i] == 0 and pred[i] == 0: print(study_num)
    print("\nFalse Negative Examples\n" + "-" * 80)
    for i, study_num in enumerate(study_nums):
        if labels[i] == 1 and pred[i] == 0: print(study_num)
    print("\nFalse Positive Examples\n" + "-" * 80)
    for i, study_num in enumerate(study_nums):
        if labels[i] == 0 and pred[i] == 1: print(study_num)

    print("Total number of data :", len(labels))
    print("Total number of positives :", len([l for l in labels if l == 1]))
    print("Total number of negatives :", len([l for l in labels if l == 0]))
    print("# True Negative : ", tn)
    print("# True Positive : ", tp)
    print("# False Negative : ", fn)
    print("# False Positive : ", fp)

    metrics = {
        args.phase + '_' + 'AUPRC':
        sk_metrics.average_precision_score(labels, max_probs),
        args.phase + '_' + 'AUROC':
        sk_metrics.roc_auc_score(labels, max_probs),
    }
    for k, v in metrics.items():
        print('{}: {:.5f}\n'.format(k, v))
    print("Saving metrics to file")
    with open(os.path.join(args.results_dir, 'metrics.txt'),
              'w') as metrics_fh:
        for k, v in metrics.items():
            metrics_fh.write('{}: {:.5f}\n'.format(k, v))

    curves = {
        args.phase + '_' + 'PRC':
        sk_metrics.precision_recall_curve(labels, max_probs),
        args.phase + '_' + 'ROC':
        sk_metrics.roc_curve(labels, max_probs)
    }

    roc = sk_metrics.roc_curve(labels, max_probs)
    with open("intermountain_roc.pkl", 'wb') as f:
        pickle.dump(roc, f)
    for name, curve in curves.items():
        curve_np = util.get_plot(name, curve)
        curve_img = Image.fromarray(curve_np)
        curve_img.save(os.path.join(args.results_dir, '{}.png'.format(name)))
示例#25
0
def bml_map(line, download_dir_name, merged_dir_name):
    #try:
        fs = line.strip().split('\t')
        pics = fs[0:3]
	title = fs[4]
	brand = fs[5]
	audio_url = fs[7]
	ideaid = fs[-1]
        m2 = hashlib.md5()   
        output_dir = download_dir_name
        url_names = []
        pic_file_names = []
        pic_local_paths = []
	q_all=[]	
        ready_pics = 0
        for index,p in enumerate(pics):
            m2.update(p + 'sunfuhao')
            file_name = m2.hexdigest() 
            fn_suf = file_name+'.jpg' 
            output_path = os.path.join(output_dir, fn_suf)
            url_names.append(p)
            pic_file_names.append(fn_suf)
            pic_local_paths.append(output_path)

            #download
            if os.path.exists(output_path):
                util.print_err("dupulicated image %s" % (file_name))
                ready_pics+= 1
                continue
            img_data = None
            use_proxy = False
            for i in range(50):     
	    #while True:
                try:
                    img_data = util.http_get(p, use_proxy)
                    util.print_err("%s downloaded succeed" % p)
                    break
                except Exception as e:
                    util.print_err("%s %s" % (e,p))
                    use_proxy = not use_proxy
                    util.print_err("use proxy")
                    time.sleep(1)
                    continue
            if img_data is not None and len(img_data) > 1000:
		q0 = None
            	for i in range(30):     
                    try:
		        q0 = cvtools.req_clarity_bvc(img_data)
			if q0<0.3 :
			    return None
	   		q_all.append(q0)
			break
                    except Exception as e:
                        util.print_err("fail_clarity")
                        time.sleep(2)
                        continue
                with open(output_path, 'w') as fn:
                    fn.write(img_data)
                img1 = cv2.imread(output_path)
                img1 = cvtools.img_resize(img1, (370, 245))
                cv2.imwrite(output_path, img1)
                ready_pics += 1
            else:
                util.print_err("%s download failed!!!" % p)

        if ready_pics != 3:
            util.print_err("has not enough images %s" % (len(pic_file_names)))
            return

        img_name1,img_name2,img_name3 = pic_file_names[0],pic_file_names[1],pic_file_names[2]
        fimg1, fimg2, fimg3 = pic_local_paths
        img1, img2, img3 = cv2.imread(fimg1), cv2.imread(fimg2), cv2.imread(fimg3)
        res_dir_chaofen = merged_dir_name
	#make mapper_quality
	#for index,files in enumerate(pic_local_paths):
	#    name = "q"+str(index)
	#    for i in range(1, 500):
	#    #while True:
	#	try:
	#	    name = cvtools.img_quality_ht(files)
	#	    q_all.append(name)
	#	    #if name < 0.55:
	#		#return None
	#	    break
	#	except Exception as e:
	#	    util.print_err("%s %s" % (e,files))
	#	    time.sleep(10)
 
	#q0 = cvtools.img_quality_ht(fimg1)
        #q1 = cvtools.img_quality_ht(fimg2)
        #q2 = cvtools.img_quality_ht(fimg3)
	#print q0,q1,q2
	#if (q0< 0.55 or q1<0.55 or q2<0.55) :
	#    return None;
        fn_path1 = os.path.join(res_dir_chaofen, img_name1)
        fn_path2 = os.path.join(res_dir_chaofen, img_name2)
        fn_path3 = os.path.join(res_dir_chaofen, img_name3)
        
        cv2.imwrite(fn_path1, img1)
        cv2.imwrite(fn_path2, img2)
        cv2.imwrite(fn_path3, img3)
        c0 = cvtools.super_resolution(fn_path1, svr_url, is_local=False)
	h, w, d = c0.shape[:3]
        target_sz = (h, int(867), d)
        c2_p = cvtools.img_padding(c0, target_sz, dir='h', method='gblur')
        cv2.imwrite(fn_path1, c2_p)
	
        with open(fn_path1) as f:
             base1 = base64.b64encode(f.read())
        c0 = cvtools.super_resolution(fn_path2, svr_url, is_local=False)
	h, w, d = c0.shape[:3]
        target_sz = (h, int(867), d)
        c2_p = cvtools.img_padding(c0, target_sz, dir='h', method='gblur')
        cv2.imwrite(fn_path2, c2_p)
	
        with open(fn_path2) as f:
             base2 = base64.b64encode(f.read())
        c0 = cvtools.super_resolution(fn_path3, svr_url, is_local=False)
	h, w, d = c0.shape[:3]
        target_sz = (h, int(867), d)
        c2_p = cvtools.img_padding(c0, target_sz, dir='h', method='gblur')
        cv2.imwrite(fn_path3, c2_p)
        with open(fn_path3) as f:
             base3 = base64.b64encode(f.read())
	#fn = open("./log_res", 'a')
	#templtelist = ['feedinspireing2','Digital Zoom-3','Color Swipe-3','ElegantSlideshow-3']
	templtelist = ['99','98','97','96']
	templte = random.choice(templtelist)
	templte = "99" 
	basestr = base1+"\t"+base2+"\t"+base3+"\t"+"end after three pics"+"\n"
	prjson = '{"video_key":"%s","company":"%s","audio":["%s"],"pic_and_desc":[{"pic_binary":"%s","desc":"%s"},{"pic_binary":"%s","desc":"%s"},{"pic_binary":"%s","desc":"%s"}],"trade":[{"trade_id_1st":"%s","trade_name_1st":"feed"}],"ad_info":{"userid":"%s","planid":"123","unitid":"123","winfoid":"123"},"other_info":{"lp_url":""}}' %(ideaid,title,audio_url,base1,title,base2,brand,base3,title,templte,ideaid)
	#with open("./data/log_res",'a') as fn:
	#    fn.write(prjson)
	#fn.close()
	#if len(q_all) == 3: 
	#    print (prjson+"\t"+str(q_all[0])+"\t"+str(q_all[1])+"\t"+str(q_all[2]))
	#else:
	#    print (prjson+"\t"+"not_3pic")
	print prjson
示例#26
0
             base3 = base64.b64encode(f.read())
	#fn = open("./log_res", 'a')
	#templtelist = ['feedinspireing2','Digital Zoom-3','Color Swipe-3','ElegantSlideshow-3']
	templtelist = ['99','98','97','96']
	templte = random.choice(templtelist)
	templte = "99" 
	basestr = base1+"\t"+base2+"\t"+base3+"\t"+"end after three pics"+"\n"
	prjson = '{"video_key":"%s","company":"%s","audio":["%s"],"pic_and_desc":[{"pic_binary":"%s","desc":"%s"},{"pic_binary":"%s","desc":"%s"},{"pic_binary":"%s","desc":"%s"}],"trade":[{"trade_id_1st":"%s","trade_name_1st":"feed"}],"ad_info":{"userid":"%s","planid":"123","unitid":"123","winfoid":"123"},"other_info":{"lp_url":""}}' %(ideaid,title,audio_url,base1,title,base2,brand,base3,title,templte,ideaid)
	#with open("./data/log_res",'a') as fn:
	#    fn.write(prjson)
	#fn.close()
	#if len(q_all) == 3: 
	#    print (prjson+"\t"+str(q_all[0])+"\t"+str(q_all[1])+"\t"+str(q_all[2]))
	#else:
	#    print (prjson+"\t"+"not_3pic")
	print prjson
#    except Exception as e:
#        util.print_err("%s" % e)
#        util.print_err(line)
#        return

if __name__ == "__main__":
    download_dir_name = sys.argv[1]
    merged_dir_name = sys.argv[2]
    util.print_err("%s %s" % (download_dir_name, merged_dir_name))
    global svr_url
    svr_url = [cvtools.get_server_addr()]
    for line in sys.stdin:
        bml_map(line, download_dir_name, merged_dir_name)

def test(args):
    print ("Stage 1")
    model, ckpt_info = ModelSaver.load_model(args.ckpt_path, args.gpu_ids)
    print ("Stage 2")
    args.start_epoch = ckpt_info['epoch'] + 1
    model = model.to(args.device)
    print ("Stage 3")
    model.eval()
    print ("Stage 4")
    data_loader = CTDataLoader(args, phase=args.phase, is_training=False)
    study2slices = defaultdict(list)
    study2probs = defaultdict(list)
    study2labels = {}
    logger = TestLogger(args, len(data_loader.dataset), data_loader.dataset.pixel_dict)

    means = []

    # Get model outputs, log to TensorBoard, write masks to disk window-by-window
    util.print_err('Writing model outputs to {}...'.format(args.results_dir))
    with tqdm(total=len(data_loader.dataset), unit=' windows') as progress_bar:
        for i, (inputs, targets_dict) in enumerate(data_loader):
            means.append(inputs.mean().data[0])
            with torch.no_grad():
                cls_logits = model.forward(inputs.to(args.device))
                cls_probs = F.sigmoid(cls_logits)

            if args.visualize_all:
                logger.visualize(inputs, cls_logits, targets_dict=None, phase=args.phase, unique_id=i)

            max_probs = cls_probs.to('cpu').numpy()
            for study_num, slice_idx, prob in \
                    zip(targets_dict['study_num'], targets_dict['slice_idx'], list(max_probs)):
                # Convert to standard python data types
                study_num = int(study_num)
                slice_idx = int(slice_idx)

                # Save series num for aggregation
                study2slices[study_num].append(slice_idx)
                study2probs[study_num].append(prob.item())

                series = data_loader.get_series(study_num)
                if study_num not in study2labels:
                    study2labels[study_num] = int(series.is_positive)

            progress_bar.update(inputs.size(0))
    
    # Combine masks
    util.print_err('Combining masks...')
    max_probs = []
    labels = []
    predictions = {}
    print("Get max probability")
    for study_num in tqdm(study2slices):

        # Sort by slice index and get max probability
        slice_list, prob_list = (list(t) for t in zip(*sorted(zip(study2slices[study_num], study2probs[study_num]),
                                                              key=lambda slice_and_prob: slice_and_prob[0])))
        study2slices[study_num] = slice_list
        study2probs[study_num] = prob_list
        max_prob = max(prob_list)
        max_probs.append(max_prob)
        label = study2labels[study_num]
        labels.append(label)
        predictions[study_num] = {'label':label, 'pred':max_prob}

    #Save predictions to file, indexed by study number
    print("Save to pickle")
    with open('{}/preds.pickle'.format(args.results_dir),"wb") as fp:
        pickle.dump(predictions,fp)
        
    # Write features for XGBoost
    save_for_xgb(args.results_dir, study2probs, study2labels)
    # Write the slice indices used for the features
    print("Write slice indices")
    with open(os.path.join(args.results_dir, 'xgb', 'series2slices.json'), 'w') as json_fh:
        json.dump(study2slices, json_fh, sort_keys=True, indent=4)

    # Compute AUROC and AUPRC using max aggregation, write to files
    max_probs, labels = np.array(max_probs), np.array(labels)
    metrics = {
        args.phase + '_' + 'AUPRC': sk_metrics.average_precision_score(labels, max_probs),
        args.phase + '_' + 'AUROC': sk_metrics.roc_auc_score(labels, max_probs),
    }
    print("Write metrics")
    with open(os.path.join(args.results_dir, 'metrics.txt'), 'w') as metrics_fh:
        for k, v in metrics.items():
            metrics_fh.write('{}: {:.5f}\n'.format(k, v))

    curves = {
        args.phase + '_' + 'PRC': sk_metrics.precision_recall_curve(labels, max_probs),
        args.phase + '_' + 'ROC': sk_metrics.roc_curve(labels, max_probs)
    }
    for name, curve in curves.items():
        curve_np = util.get_plot(name, curve)
        curve_img = Image.fromarray(curve_np)
        curve_img.save(os.path.join(args.results_dir, '{}.png'.format(name)))
示例#28
0
 def print_result(self, results):
     deflection = results[-1] / (self.load_case.wing.wing_box.end_y *
                                 2) * 100
     print("Maximum deflection: {0:.2f} [%]".format(deflection))
     if deflection > self.load_case.limit_deflection:
         util.print_err("Wing box failed: deflection exceeded limits")
示例#29
0
def run_models(args, csv_cols, models, task_sequence):
    """Run models and save predicted probabilities to disk.

    Args:
        args: Command-line arguments.
        csv_cols: List of column headers for the CSV files (should be 'Path' + all pathologies).
        models: List of (ckpt_path, is_3class) tuples to use for the models.
        task_sequence: List of tasks to predict for each model.
    """
    data_args = args.data_args
    logger_args = args.logger_args
    model_args = args.model_args
    transform_args = args.transform_args

    # Get eval loader
    data_loader = get_loader(data_args,
                             transform_args,
                             data_args.split,
                             TASK_SEQUENCES[data_args.task_sequence],
                             su_frac=1,
                             nih_frac=0,
                             batch_size=args.batch_size,
                             is_training=False,
                             shuffle=False,
                             study_level=True,
                             return_info_dict=True)

    num_models_finished = 0
    for ckpt_path, is_3class in models:

        output_dir = os.path.join(logger_args.results_dir, data_args.split)
        output_path = os.path.join(
            output_dir, '{}.csv'.format(get_checkpoint_identifier(ckpt_path)))
        if os.path.exists(output_path):
            print(
                f"Single model probabilities already written to {output_path}")
            continue
        else:
            if not os.path.isdir(output_dir):
                os.makedirs(output_dir)
            # Make an empty file so parallel processes run different models
            # open(output_path, 'w').close()

        util.print_err('Running model {} / {} from path {}'.format(
            num_models_finished + 1, len(models), ckpt_path))

        # Get model
        model_args.model_uncertainty = is_3class
        model, ckpt_info = ModelSaver.load_model(ckpt_path, args.gpu_ids,
                                                 model_args, data_args)
        model = model.to(args.device)
        model.eval()

        # Get task sequence predicted by the model
        model_task_sequence = model.module.task_sequence
        if model_task_sequence != task_sequence:
            error_msgs = [
                'Mismatched task sequences:',
                'Checkpoint: {}'.format(model_task_sequence),
                'Args: {}'.format(TASK_SEQUENCES[data_args.task_sequence])
            ]
            raise ValueError('\n  '.join(error_msgs))

        # Sample from the data loader and record model outputs
        csv_rows = []
        num_examples = len(data_loader.dataset)
        with tqdm(total=num_examples,
                  unit=' ' + data_args.split + ' ' +
                  data_args.dataset_name) as progress_bar:
            for inputs, targets, info_dict, mask in data_loader:

                with torch.no_grad():

                    # For Stanford, evaluate on studies
                    if data_args.dataset_name == 'stanford':
                        # Fuse batch size `b` and study length `s`
                        b, s, c, h, w = inputs.size()
                        inputs = inputs.view(-1, c, h, w)

                        # Predict
                        logits = model.forward(inputs.to(args.device))
                        logits = logits.view(b, s, -1)

                        # Mask padding to negative infinity
                        ignore_where = (mask == 0).unsqueeze(-1).repeat(
                            1, 1, logits.size(-1)).to(args.device)
                        logits = torch.where(ignore_where,
                                             torch.full_like(logits, NEG_INF),
                                             logits)
                        logits, _ = torch.max(logits, 1)
                    elif data_args.dataset_name == 'nih':
                        logits = model.forward(inputs.to(args.device))
                    else:
                        raise ValueError('Invalid dataset name: {}'.format(
                            data_args.dataset_name))

                    # Save study path and probabilities for each example
                    if is_3class:
                        batch_probs = util.uncertain_logits_to_probs(logits)
                    else:
                        batch_probs = torch.sigmoid(logits)
                    study_paths = info_dict['paths']
                    for probs, path in zip(batch_probs, study_paths):
                        csv_row = {
                            COL_PATH: path,
                            'DataSplit': data_args.split
                        }
                        csv_row.update({
                            task: prob.item()
                            for task, prob in zip(model_task_sequence, probs)
                        })
                        csv_rows.append(csv_row)

                progress_bar.update(targets.size(0))

        # Write CSV file to disk
        df = pd.DataFrame(csv_rows)
        print('Saving single-model probabilities to: {}'.format(output_path))
        df[csv_cols].to_csv(output_path, index=False)

        num_models_finished += 1
示例#30
0
 def print_result(self, results):
     twist = results[-1] * 180 / sp.pi
     print("Maximum twist: {0:.2f} [deg]".format(twist))
     if twist > self.load_case.limit_twist:
         util.print_err("Wing box failed: twist exceeded limits")