示例#1
0
def main():
    """
    Export and convert all found documents
    """
    # Remove old docs before converting
    for filename in os.listdir('docs'):
        if filename in ['js', 'css', 'api_reference']:
            continue
        filepath = os.path.join('docs', filename)
        if os.path.isfile(filepath):
            os.remove(filepath)
        else:
            shutil.rmtree(filepath)
    documents = get_index()
    for document_id, filename in documents.items():
        print(f'Processing https://docs.google.com/document/d/{document_id}/')
        with tempfile.NamedTemporaryFile() as document_file, \
                tempfile.TemporaryDirectory() as drawings_dir:
            download_document(document_id, file_name=document_file.name)
            download_drawings(document_id, drawings_dir)
            document = prepare_docx(document_file.name, drawings_dir)
        output_path = os.path.join('docs', filename)
        convert(document, output_path)

    print('Done.')
示例#2
0
def get_index() -> dict:
    """
    Find all Google Docs links in documentation index
    """
    document_map = {GOOGLE_DOC_INDEX_ID: 'index.md'}
    index_html = download_document(GOOGLE_DOC_INDEX_ID, fmt='html')
    soup = BeautifulSoup(index_html, 'html.parser')
    for elem in soup.find_all('a'):
        match = GOOGLE_DOC_URL_REGEXP.search(elem['href'])
        if not match:
            continue
        document_id = match.group(1)
        if document_id in document_map:
            continue
        if document_id == '1Ia4zYmkB6I6IbWPRlcZYYuMS1ZI55T99dp9LiMJqXCE':
            # Temporarily not available
            continue
        document_title = elem.get_text()
        if document_title == 'H':
            # Bad markup here
            document_title = 'How Inventories Work'
        document_slug = slugify(document_title, separator='_')
        filename = f'{document_slug}.md'
        document_map[document_id] = filename

    with open('index.json', 'w') as index_json:
        json.dump(document_map, index_json, indent=4)

    print(f'Found {len(document_map)} documents')
    return document_map
示例#3
0
def main():
    """
    Export and convert all found documents
    """
    documents = get_index()
    for document_id, filename in documents.items():
        print(f'Processing https://docs.google.com/document/d/{document_id}/')
        with tempfile.NamedTemporaryFile() as document_file, \
                tempfile.TemporaryDirectory() as drawings_dir:
            download_document(document_id, file_name=document_file.name)
            download_drawings(document_id, drawings_dir)
            document = prepare_docx(document_file.name, drawings_dir)
        output_path = os.path.join('docs', filename)
        convert(document, output_path)

    print('Done.')