Пример #1
0
    def run_ocr(self,
                lang: str,
                time_start: str,
                time_end: str,
                conf_threshold: int,
                tesseract_config: str,
                roi=[[0, 1], [0, 1]],
                debug=bool,
                num_jobs=int) -> None:
        self.lang = lang
        self.tesseract_config = tesseract_config
        self.roi = roi
        self.debug = debug
        self.num_jobs = num_jobs

        ocr_start = utils.get_frame_index(time_start,
                                          self.fps) if time_start else 0
        ocr_end = utils.get_frame_index(
            time_end, self.fps) if time_end else self.num_frames

        if ocr_end < ocr_start:
            raise ValueError('time_start is later than time_end')
        num_ocr_frames = ocr_end - ocr_start

        # get frames from ocr_start to ocr_end
        with Capture(self.path) as v:
            v.set(cv2.CAP_PROP_POS_FRAMES, ocr_start)
            frames = (v.read()[1] for _ in range(num_ocr_frames))

            # perform ocr to frames in parallel
            if num_jobs is not None:
                it_ocr = p_tqdm.p_imap(self._image_to_data,
                                       range(num_ocr_frames),
                                       frames,
                                       num_cpus=num_jobs)
            else:
                it_ocr = p_tqdm.p_imap(self._image_to_data,
                                       range(num_ocr_frames), frames)

            self.pred_frames = [
                PredictedFrame(i + ocr_start, data, conf_threshold)
                for i, data in enumerate(it_ocr)
            ]
Пример #2
0
def store_img(img_list, f_format, path):
    if not os.path.exists(path): os.mkdir(path)
    # 下載並儲存所有影像檔
    try:
        img = p_imap(download_img, img_list)
        for index, item in enumerate(img):
            content = f'{path}/{index}.{f_format}'
            if not os.path.exists(content):
                with open(content, 'wb') as file:
                    file.write(item)
    except Exception as e:
        print(f'影像下載失敗,原因:{e}')
def build_BP_mat(api_sets, num_apis):
    comb_func = lambda api_list: np.array(list(combinations(api_list, r=2)))
    row = []
    col = []
    for combos in p_imap(comb_func, api_sets):
        row.extend(combos[:,0])
        col.extend(combos[:,1])
    mat = sparse.csr_matrix(([True]*len(row), (row, col)), shape=(num_apis, num_apis), dtype=bool)
    del row, col
    mat.setdiag(True)
    mat += mat.T
    return mat
Пример #4
0
    def evaluate_parallel(self, func, *args, nprocs=None, **kwargs):
        """Evaluate model locally in parallel.

        All detected processors will be used if `nprocs` is not specified.

        Parameters
        ----------
        func : function,
            Model, or function that wraps a model, to be run in parallel.
            The provided function needs to accept a numpy array of inputs as 
            its first parameter and must return a numpy array of results.
        
        nprocs : int,
            Number of processors to use. Uses all available if not specified.
        
        *args : list,
            Additional arguments to be passed to `func`
        
        **kwargs : dict,
            Additional keyword arguments passed to `func`

        Returns
        ----------
        self : ProblemSpec object
        """
        warnings.warn("This is an experimental feature and may not work.")

        if self._samples is None:
            raise RuntimeError("Sampling not yet conducted")

        if nprocs is None:
            nprocs = cpu_count()

        # Create wrapped partial function to allow passing of additional args
        tmp_f = self._wrap_func(func, *args, **kwargs)

        # Split into even chunks
        chunks = np.array_split(self._samples, int(nprocs), axis=0)

        if ptqdm_available:
            # Display progress bar if available
            res = p_imap(tmp_f, chunks, num_cpus=nprocs)
        else:
            with Pool(nprocs) as pool:
                res = list(pool.imap(tmp_f, chunks))

        self._results = self._collect_results(res)

        return self
Пример #5
0
def mp_mugenerate_voted_list(target_dir, weight=None):
    # 计时
    start = time.time()
    file_num = len(test_list)
    count = 0

    iterator = p_imap(partial(mp_vote_img_weight,
                              target_dir=target_dir,
                              weight=weight),
                      test_list,
                      position=0,
                      leave=True,
                      ncols=100,
                      dynamic_ncols=False)

    for img_name in iterator:
        pass

    return
Пример #6
0
def compute(predictions,
            references,
            rouge_types=None,
            use_aggregator=True,
            use_parallel=False,
            show_progress=False):
    if rouge_types is None:
        rouge_types = ['rouge1']

    scorer = rouge_scorer.RougeScorer(rouge_types=rouge_types,
                                      use_stemmer=False)
    aggregator = rouge_scorer.scoring.BootstrapAggregator(
    ) if use_aggregator else None
    if not use_aggregator and use_parallel:
        scores = list(
            p_imap(lambda x: scorer.score(x[0], x[1]),
                   list(zip(references, predictions))))
    else:
        scores = []
        if show_progress:
            for i in tqdm(range(len(references))):
                score = scorer.score(references[i], predictions[i])
                if use_aggregator:
                    aggregator.add_scores(score)
                else:
                    scores.append(score)
        else:
            for i in range(len(references)):
                score = scorer.score(references[i], predictions[i])
                if use_aggregator:
                    aggregator.add_scores(score)
                else:
                    scores.append(score)

    if use_aggregator:
        result = aggregator.aggregate()
    else:
        result = {}
        for key in scores[0]:
            result[key] = list(score[key] for score in scores)

    return result
Пример #7
0
    def extract_features(self):
        print('[INFO] Extract features from all audio files')

        all_lists = self.meta_data.file_list()
        fnames = list(set(all_lists[0]))

        # prepare extractor
        feat_type = self.config['features']['type']
        extractor = F.prepare_extractor(feats=feat_type, params=self.config['features'])
        writer = io.HDFWriter(file_name=self.feat_file)

        iterator = p_imap(lambda fn: self._extraction_job(fn, extractor, self.meta_data.data_dir), fnames)

        for result in iterator:
            for fn, feat in result.items():
                fid = path.basename(fn)
                writer.append(file_id=fid, feat=feat)
        writer.close()
        del writer
        print('Files processed: %d' % len(fnames))
Пример #8
0
            raw_target = ' '.join(
                paragraph_toks_from_html(resolve_course(target_note_str)))
            if raw_target in seen_targets:
                print('Duplicate hospital course.  MRN={}. Account={}.'.format(
                    mrn, account))
                continue
            seen_targets.add(raw_target)
            examples.append(
                (mrn, account, len(source_note_str), len(target_note_str),
                 source_note_str, target_note_str))

    if len(examples) > 0:
        df = pd.DataFrame(examples,
                          columns=[
                              'mrn', 'account', 'source_len', 'target_len',
                              'source_str', 'target_str'
                          ])
        df.to_csv(examples_fn, index=False)
        return 1
    return 0


if __name__ == '__main__':
    mrn_status_df, mrn_valid_idxs, mrns = get_mrn_status_df('valid_account')
    n = len(mrns)
    print('Processing {} mrns'.format(n))
    statuses = list(p_imap(generate_examples, mrns, num_cpus=0.8))
    print('{} out of {} are valid'.format(sum(statuses), len(statuses)))
    update_mrn_status_df(mrn_status_df, list(statuses), mrn_valid_idxs,
                         'valid_example')
def process_image(row):
    try:
        dcm = pydicom.dcmread(f"../../data/raw/rsna-intracranial-hemorrhage-detection/stage_2_train/{row[1]}.dcm")
        return row[1], dcm.pixel_array, row[2:]
    except:
        return None, None, None

# Transforming the manifest such that each row is one image and each column is a label.
manifest = pd.read_csv("../../data/interim/manifest.csv")
split_id = manifest["ID"].str.rsplit('_', n=1, expand=True)
manifest["ID"] = split_id[0]
manifest["Subtype"] = split_id[1]
manifest = manifest.pivot(index="ID", columns="Subtype", values="Label").reset_index().rename_axis(None, axis=1)

processed_images = p_imap(process_image, list(manifest.itertuples()), num_cpus=cores)
ids = []
labels = []
for id, pixels, labels_ in processed_images:
    # Rarely pydicom fails to read an image, discard it if that's the case.
    if id is None and pixels is None and labels_ is None: continue
    # Rarely an image is of a different size than the overwhelming majority, discard it if that's the case.
    if pixels.shape != (512, 512): continue

    np.save(f"../../data/processed/images/{id}.npy", pixels)
    ids.append(id)
    labels.append(labels_)

print(f"Processed {len(ids)} images successfully.")
np.save("../../data/processed/ids.npy", np.array(ids))
np.save("../../data/processed/Y.npy", np.array(labels))