Python download_document示例

编程语言: Python

命名空间/包名称: export

方法/功能: download_document

hotexamples.com的示例: 3

Python download_document - 已找到3个示例。这些是从开源项目中提取的最受好评的export.download_document现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： crawl.py 项目： avalenn/beancount-docs

def main():
    """
    Export and convert all found documents
    """
    # Remove old docs before converting
    for filename in os.listdir('docs'):
        if filename in ['js', 'css', 'api_reference']:
            continue
        filepath = os.path.join('docs', filename)
        if os.path.isfile(filepath):
            os.remove(filepath)
        else:
            shutil.rmtree(filepath)
    documents = get_index()
    for document_id, filename in documents.items():
        print(f'Processing https://docs.google.com/document/d/{document_id}/')
        with tempfile.NamedTemporaryFile() as document_file, \
                tempfile.TemporaryDirectory() as drawings_dir:
            download_document(document_id, file_name=document_file.name)
            download_drawings(document_id, drawings_dir)
            document = prepare_docx(document_file.name, drawings_dir)
        output_path = os.path.join('docs', filename)
        convert(document, output_path)

    print('Done.')

示例#2

显示文件

文件： crawl.py 项目： avalenn/beancount-docs

def get_index() -> dict:
    """
    Find all Google Docs links in documentation index
    """
    document_map = {GOOGLE_DOC_INDEX_ID: 'index.md'}
    index_html = download_document(GOOGLE_DOC_INDEX_ID, fmt='html')
    soup = BeautifulSoup(index_html, 'html.parser')
    for elem in soup.find_all('a'):
        match = GOOGLE_DOC_URL_REGEXP.search(elem['href'])
        if not match:
            continue
        document_id = match.group(1)
        if document_id in document_map:
            continue
        if document_id == '1Ia4zYmkB6I6IbWPRlcZYYuMS1ZI55T99dp9LiMJqXCE':
            # Temporarily not available
            continue
        document_title = elem.get_text()
        if document_title == 'H':
            # Bad markup here
            document_title = 'How Inventories Work'
        document_slug = slugify(document_title, separator='_')
        filename = f'{document_slug}.md'
        document_map[document_id] = filename

    with open('index.json', 'w') as index_json:
        json.dump(document_map, index_json, indent=4)

    print(f'Found {len(document_map)} documents')
    return document_map

示例#3

显示文件

def main():
    """
    Export and convert all found documents
    """
    documents = get_index()
    for document_id, filename in documents.items():
        print(f'Processing https://docs.google.com/document/d/{document_id}/')
        with tempfile.NamedTemporaryFile() as document_file, \
                tempfile.TemporaryDirectory() as drawings_dir:
            download_document(document_id, file_name=document_file.name)
            download_drawings(document_id, drawings_dir)
            document = prepare_docx(document_file.name, drawings_dir)
        output_path = os.path.join('docs', filename)
        convert(document, output_path)

    print('Done.')