Exemplo n.º 1
0
def get_gdoc(gdoc_url, output_folder):
    gd_client = getAuthorizedGoogleDocsClient()
    match_doc_id = re.match(r'^.*docs\.google\.com/document/d/([^/]+).*$', gdoc_url)
    if match_doc_id:
        doc_id = match_doc_id.group(1)

        # create a sub directory named like the ID
        doc_output_dir = output_folder
        try:
            os.mkdir(doc_output_dir)
        except OSError:
            pass    # If subdirectory already exists do nothing

        doc_key = 'document:' +  doc_id

        # get the Google Docs Entry
        #gd_entry = gd_client.GetDoc(doc_key)
        gd_entry = gd_client.get_resource_by_id(doc_key)

        # Get the contents of the document
        gd_entry_url = gd_entry.content.src     # should be the same as url, but better ask API for url
        html = gd_client._get_content(gd_entry_url)     # requires a URL

        html_filename = os.path.join(doc_output_dir, doc_id +'.htm')
        html_file = open(html_filename, 'w')
        try:
            html_file.write(html)
            html_file.flush()
        finally:
            html_file.close()
        return (gd_entry.resource_id.text, gd_entry.title.text)

    else:
        print 'Error matching doc id in get_gdoc'
        quit()
Exemplo n.º 2
0
def main():
    # keep sure Java is installed (needed for Jing)
    if not java_installed():
        print "ERROR: Could not find Java. Please keep sure that Java is installed and available."
        exit(1)
    # delete the contents of the testbed folder
    delete_all_contents_of_folder('./gdoc_output')
    # login to gdocs and get a client object
    gd_client = getAuthorizedGoogleDocsClient()
    # open file with GDocs public documents URLs (<- the testbed for GDocs)
    url_file = open(os.path.join(TESTBED_INPUT_DIR, TESTBED_INPUT_URLS_FILE))
    for url in url_file:
        if not url.startswith('#'):   # ignore comments
            # check if we really have a gdocs document with an ID
            # Get the ID out of the URL with regular expression
            rid,original_title = get_gdoc(url, './gdoc_output')
            html_filename = os.path.join('./gdoc_output', rid[9:]+'.htm')
            html_file = open(html_filename, 'r')
            try:
                html = html_file.readlines()
                html_file.flush()
            finally:
                html_file.close()
            inline_html = ''
            for line in html:
                if(line[len(line)-1] == '\n'):
                    inline_html = inline_html + line[0:len(line)-1]
                else:
                    inline_html = inline_html + line

            sections = divide_sections(inline_html)
            sect_num = 0
            for sect in sections:
                html_filename = './gdoc_output/'+rid[9:]+'_sect_'+str(sect_num)+'.htm'
                html_file = open(html_filename, 'w')
                try:
                    html_file.write(sect)
                    html_file.flush()
                finally:
                    html_file.close()

                sect_num = sect_num + 1

            sect_num = 0
            for sect in sections:
                html_filename = './gdoc_output/'+rid[9:]+'_sect_'+str(sect_num)+'.htm'
                try:
                    upload_doc(html_filename, 'text/html', original_title+', Section '+str(sect_num))
                except KeyboardInterrupt:
                    exit()
                except:
                    print('Error uploading section '+str(sect_num)+'. Are there images in there?')
                    
                os.remove(html_filename)
                sect_num = sect_num + 1



    print_status('Finished!')
Exemplo n.º 3
0
def upload_doc(filename, handle, new_title):
    gd_client = getAuthorizedGoogleDocsClient()
    document = gdata.docs.data.Resource(type='document', title=new_title)
    path=filename
    media=gdata.data.MediaSource()
    media.SetFileHandle(path, handle)
    document = gd_client.CreateResource(document,media=media)
    return document.resource_id.text
Exemplo n.º 4
0
def upload_doc(filename, handle, new_title):
    gd_client = getAuthorizedGoogleDocsClient()
    document = gdata.docs.data.Resource(type='document', title=new_title)
    path = filename
    media = gdata.data.MediaSource()
    media.SetFileHandle(path, handle)
    document = gd_client.CreateResource(document, media=media)
    return document.resource_id.text
Exemplo n.º 5
0
def get_gdoc(gdoc_url, output_folder):
    gd_client = getAuthorizedGoogleDocsClient()
    match_doc_id = re.match(r'^.*docs\.google\.com/document/d/([^/]+).*$',
                            gdoc_url)
    if match_doc_id:
        doc_id = match_doc_id.group(1)

        # create a sub directory named like the ID
        doc_output_dir = output_folder
        try:
            os.mkdir(doc_output_dir)
        except OSError:
            pass  # If subdirectory already exists do nothing

        doc_key = 'document:' + doc_id

        # get the Google Docs Entry
        #gd_entry = gd_client.GetDoc(doc_key)
        gd_entry = gd_client.get_resource_by_id(doc_key)

        # Get the contents of the document
        gd_entry_url = gd_entry.content.src  # should be the same as url, but better ask API for url
        html = gd_client._get_content(gd_entry_url)  # requires a URL

        html_filename = os.path.join(doc_output_dir, doc_id + '.htm')
        html_file = open(html_filename, 'w')
        try:
            html_file.write(html)
            html_file.flush()
        finally:
            html_file.close()
        return (gd_entry.resource_id.text, gd_entry.title.text)

    else:
        print 'Error matching doc id in get_gdoc'
        quit()
Exemplo n.º 6
0
def main():
    # keep sure Java is installed (needed for Jing)
    if not java_installed():
        print "ERROR: Could not find Java. Please keep sure that Java is installed and available."
        exit(1)
    # delete the contents of the testbed folder
    delete_all_contents_of_folder('./gdoc_output')
    # login to gdocs and get a client object
    gd_client = getAuthorizedGoogleDocsClient()
    # open file with GDocs public documents URLs (<- the testbed for GDocs)
    url_file = open(os.path.join(TESTBED_INPUT_DIR, TESTBED_INPUT_URLS_FILE))
    for url in url_file:
        if not url.startswith('#'):  # ignore comments
            # check if we really have a gdocs document with an ID
            # Get the ID out of the URL with regular expression
            rid, original_title = get_gdoc(url, './gdoc_output')
            html_filename = os.path.join('./gdoc_output', rid[9:] + '.htm')
            html_file = open(html_filename, 'r')
            try:
                html = html_file.readlines()
                html_file.flush()
            finally:
                html_file.close()
            inline_html = ''
            for line in html:
                if (line[len(line) - 1] == '\n'):
                    inline_html = inline_html + line[0:len(line) - 1]
                else:
                    inline_html = inline_html + line

            sections = divide_sections(inline_html)
            sect_num = 0
            for sect in sections:
                html_filename = './gdoc_output/' + rid[9:] + '_sect_' + str(
                    sect_num) + '.htm'
                html_file = open(html_filename, 'w')
                try:
                    html_file.write(sect)
                    html_file.flush()
                finally:
                    html_file.close()

                sect_num = sect_num + 1

            sect_num = 0
            for sect in sections:
                html_filename = './gdoc_output/' + rid[9:] + '_sect_' + str(
                    sect_num) + '.htm'
                try:
                    upload_doc(html_filename, 'text/html',
                               original_title + ', Section ' + str(sect_num))
                except KeyboardInterrupt:
                    exit()
                except:
                    print('Error uploading section ' + str(sect_num) +
                          '. Are there images in there?')

                os.remove(html_filename)
                sect_num = sect_num + 1

    print_status('Finished!')