Exemplo n.º 1
0
def reorder_vocabulary(vocabulary_path: str) -> None:
    with open(vocabulary_path, 'r') as file_handler:
        raw_lines = file_handler.readlines()
    sections: List[List[str]] = []
    current_section: List[str] = []
    for line in raw_lines:
        processed_line = line.strip()
        if not processed_line:
            continue
        if processed_line.startswith('#') and current_section:
            sections.append(current_section)
            current_section = []
        current_section.append(processed_line)
    if current_section:
        sections.append(current_section)
    sorted_sections: List[List[str]] = []
    for section_num, section in enumerate(sections, 1):
        sorted_sections.append(
            [f'{r}\n' for r in section if r.startswith('#')]
            + sorted(f'{r}\n' for r in section if not r.startswith('#'))
            + (['\n'] if section_num < len(sections) else []),
        )

    with open(vocabulary_path, 'w') as file_handler:
        file_handler.writelines(flat(sorted_sections))
Exemplo n.º 2
0
def load_obscene_words(db_path: str) -> Set[str]:
    connection = sqlite3.connect(db_path)
    cursor = connection.cursor()
    return set(
        flat(
            cursor.execute(
                f'SELECT word FROM {OBSCENE_BASE_TABLE_NAME}', ).fetchall()))
Exemplo n.º 3
0
def extract_all_constants_from_path(
    path: str,
    exclude: List[str],
    process_dots: bool,
    processes_amount: int,
    verbosity: int = 0,
) -> List[str]:
    extractors = [
        (extract_from_python_src, ['py', 'pyi']),
        (extract_from_markdown, ['md']),
        (extract_from_html, ['html']),
        (extract_from_js, ['js', 'ts', 'tsx']),
        (extract_from_po, ['po']),
    ]

    extension_to_extractor_mapping: DefaultDict[
        str, List[Callable]] = collections.defaultdict(list)
    for extractor, extensions in extractors:
        for extension in extensions:
            extension_to_extractor_mapping[extension].append(extractor)

    string_constants: List[str] = []

    for extension, extension_extractors in extension_to_extractor_mapping.items(
    ):
        if os.path.isdir(path):
            all_files = get_all_filepathes_recursively(path, exclude,
                                                       extension)
        else:
            all_files = [path] if path.endswith(extension) else []
        if not process_dots:
            all_files = [
                f for f in all_files if '/.' not in f and not f.startswith('.')
            ]
        if not all_files:
            continue
        chunk_size = math.ceil(len(all_files) / processes_amount)
        new_strings = multiprocessing.Pool(processes_amount).map(
            functools.partial(
                extract_all_constants_from_files,
                extractors=extension_extractors,
                verbosity=verbosity,
            ),
            chunks(all_files, chunk_size),
        )
        string_constants += flat(new_strings)
    return list(set(string_constants))
def test_flat():
    assert flat([[1, 2], [3, 4], [5, 6]]) == list(range(1, 7))
    assert flat([['a', 'b'], ['c', 'd']]) == ['a', 'b', 'c', 'd']