def bio_to_brat_parallel_wrapper(file_names, n_cores): """Parallel wrapper for article_list_bio_to_brat Args: file_names (list of lists): elements: [PosixPath, PosixPath, PosixPath, PosixPath] paths to text, labels and output text and output annotation n_cores (int): number of python processes to use (multiprocessing package) """ list_segments = chunk_list(file_names, n_cores) with Pool(n_cores) as p: p.map(article_list_bio_to_brat, list_segments)
def parse_article_list_parallel_wrapper(in_list, n_cores=4): """Parallel wrapper around parse_article_list Args: in_list ([in_path, out_path]): path to input JATS, location for output plain txt n_cores (int, optional): number parallel python processes to spawn (multiprocessing package). Defaults to 4. """ list_segments = chunk_list(in_list, n_cores) with Pool(n_cores) as p: error_counts = p.map(parse_article_list, list_segments) return sum(error_counts)
def parse_file_list_parallel_wrapper(in_list, out_path='.', n_cores=4): """Parallel wrapper for parse_file_list Args: in_list (list of PosixPaths): List of input files out_path (str, optional): Directory in which to write the outputs. Defaults to '.'. n_cores (int, optional): Number of python threads to use. Defaults to 4. Returns: int: Number of articles extracted """ list_segments = chunk_list(in_list, n_cores) fct_to_execute = partial(parse_file_list, out_path=out_path) with Pool(n_cores) as p: n_articles = p.map(fct_to_execute, list_segments) return sum(n_articles)
def preprocess_articles_parallel_wrapper(file_list, n_cores, process_unicode=True, replace_math=True, correct=True, corr_cite=True): """Parallel wrapper for preprocess_articles Args: file_list ([input filename, output filename]): pair of file names to read and to write n_cores (int): number of python processes to use (multiprocessing package) process_unicode (bool, optional): replace unicodes. Defaults to True. replace_math (bool, optional): replace math equations. Defaults to True. correct (bool, optional): replace string errors. Defaults to True. corr_cite (bool, optional): correct citation errors. Defaults to True. """ list_segments = chunk_list(file_list, n_cores) fct_to_execute = partial(preprocess_articles, process_unicode=process_unicode, replace_math=replace_math, correct=correct, corr_cite=corr_cite) with Pool(n_cores) as p: p.map(fct_to_execute, list_segments)
def brat_to_bio_parallel_wrapper(file_names, n_cores, process_unicode=True, replace_math=True, correct=True, corr_cite=True): """Parallel wrapper for article_list_brat_to_bio Args: file_names (list of lists): elements: [PosixPath, PosixPath, PosixPath] paths to text, annotation and output base path n_cores (int): number of python processes to use (multiprocessing package) process_unicode (bool, optional): replace unicodes. Defaults to True. replace_math (bool, optional): replace math equations. Defaults to True. correct (bool, optional): replace string errors. Defaults to True. corr_cite (bool, optional): correct citation errors. Defaults to True. """ list_segments = chunk_list(file_names, n_cores) fct_to_execute = partial(article_list_brat_to_bio, process_unicode=process_unicode, replace_math=replace_math, correct=correct, corr_cite=corr_cite) with Pool(n_cores) as p: p.map(fct_to_execute, list_segments)