示例#1
0
def process_file(fjson_storage_path):
    """Read the fjson file from disk and parse it into a structured dict."""
    if not settings.RTD_BUILD_MEDIA_STORAGE:
        log.warning(
            'RTD_BUILD_MEDIA_STORAGE is missing - Not updating intersphinx data'
        )
        raise RuntimeError(
            'RTD_BUILD_MEDIA_STORAGE is missing - Not updating intersphinx data'
        )

    storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()

    log.debug('Processing JSON file for indexing: %s', fjson_storage_path)

    try:
        with storage.open(fjson_storage_path, mode='r') as f:
            file_contents = f.read()
    except IOError:
        log.info('Unable to read file: %s', fjson_storage_path)
        raise
    data = json.loads(file_contents)
    sections = []
    path = ''
    title = ''
    domain_data = {}

    if 'current_page_name' in data:
        path = data['current_page_name']
    else:
        log.info('Unable to index file due to no name %s', fjson_storage_path)

    if data.get('body'):
        body = PyQuery(data['body'])
        sections.extend(
            generate_sections_from_pyquery(body.clone(), fjson_storage_path))
        domain_data = generate_domains_data_from_pyquery(
            body.clone(), fjson_storage_path)
    else:
        log.info('Unable to index content for: %s', fjson_storage_path)

    if 'title' in data:
        title = data['title']
        title = PyQuery(data['title']).text().replace('¶', '').strip()
    else:
        log.info('Unable to index title for: %s', fjson_storage_path)

    return {
        'path': path,
        'title': title,
        'sections': sections,
        'domain_data': domain_data,
    }
示例#2
0
def pq_remove_nodes(
    pq: PyQuery,
    css_remove: Union[str, list],
) -> PyQuery:

    pq = pq.clone()

    if isinstance(css_remove, str):
        css_remove = [css_remove]

    for remove_node in css_remove:
        pq.remove(remove_node)

    return pq