Python get_gdoc示例，splitter.get_gdoc Python示例

示例#1

0

显示文件

文件： test_conversion.py 项目： oerpub/oerpub.remix

    def test_gdocs(self):
        have_test_file = False
        try:
            fp = open('./test_files/gdocs/test_files')
            fp.close()
            have_test_file = True
        except:
            print('No gdocs test file')

        doc_files=os.listdir(test_folder_name+'doc/')
        rids = [ ]
        i=0
        while(i < len(doc_files)):
            f=doc_files[i]
            filename, extension = os.path.splitext(f)
            if(extension != '.doc'):
                doc_files.remove(f)
            else:
                i=i+1
        for d in doc_files:
            try:
                just_filename=os.path.basename(d)
                just_filename, extension = os.path.splitext(just_filename)
                rid = upload_doc(test_folder_name+'doc/'+d, 'application/msword',just_filename)
                rids.append(rid)
            except KeyboardInterrupt:
                exit()
            except :
                print('Error uploading '+just_filename+' to gdocs')

        if(have_test_file):
            fp = open('./test_files/gdocs/test_files')
            for url in fp:
                if(url[0] == '#'):
                    continue
                match_doc_id = re.match(r'^.*docs\.google\.com/document/d/([^/]+).*$', url)
                if match_doc_id:
                    rids.append('document:'+match_doc_id.group(1))
            fp.close()
                
        count = 0
        for rid in rids:
            if(count < len(doc_files)):
                filename = os.path.basename(doc_files[count])
                filename,ext = os.path.splitext(filename)
            else:
                filename = rid[9:]

            valid_filename='./test_files/gdocs/'+filename+'.cnxml'
            output_filename='./test_files/gdocs/'+filename+'.tmp'
            diff_filename = './test_files/gdocs/'+filename+'.diff'
            err_filename = './test_files/gdocs/'+filename+'.err'

            gdoc_url = construct_url(rid[9:])
            rid,original_title = get_gdoc(gdoc_url, './test_files/gdocs')
            html_filename = './test_files/gdocs/'+rid[9:]+'.htm'
            html_file = open(html_filename, 'r')
            try:
                html = html_file.read()
                html_file.flush()
            finally:
                html_file.close()
            cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True)
            cnxml = clean_cnxml(cnxml)
            validate_cnxml(cnxml)

            output=open(output_filename,'w')
            output.write(cnxml)
            output.close()
            remove_ids(output_filename)
            os.remove('./test_files/gdocs/'+rid[9:]+'.htm')

            process = subprocess.Popen(['diff',valid_filename,output_filename], shell=False, stdout=subprocess.PIPE)
            std_output = process.communicate()

            if(std_output[0] != None and len(std_output[0]) != 0):
                diff_output=open(diff_filename,'w')
                diff_output.write(std_output[0])
                diff_output.close()
                print('Differences in the testing of gdoc '+filename+', information on those differences has been placed in '+diff_filename)
            elif(std_output[1] != None and len(std_output[1]) != 0):
                err_output=open(err_filename,'w')
                err_output.write(std_output[1])
                err_output.close()
                print('Error(s) occurred while attempting to test for differences in CNXML output of gdoc '+filename+', information on these errors are in '+err_filename)
            count = count + 1

示例#2

0

显示文件

文件： convert_gdocs_cnxml.py 项目： oerpub/oerpub.remix

from oerpub.rhaptoslabs.html_gdocs2cnxml.gdocs2cnxml import gdocs_to_cnxml
from utils import clean_cnxml, escape_system
from test_conversion import validate_cnxml, remove_ids

url = 'https://docs.google.com/document/d/1tiZR1fhBl3ZQ_UaQ5sRDA3gSs_7LjgtTITkBAGjuTpI/edit'
#url='https://docs.google.com/document/d/1Gw9j1J-_d5YQoq6SIc3Az2hiVlwtvVcJkXfYKDR_zBM/edit'

match_doc_id = re.match(r'^.*docs\.google\.com/document/d/([^/]+).*$', url)
rid = 'document:' + match_doc_id.group(1)

print(rid)
filename = rid[9:]
valid_filename = 'valid.cnxml'
gdoc_url = construct_url(rid[9:])
print(gdoc_url)
rid, original_title = get_gdoc(url, './')
html_filename = './' + rid[9:] + '.htm'
html_file = open(html_filename, 'r')
try:
    html = html_file.read()
    html_file.flush()
finally:
    html_file.close()

cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True)
cnxml = clean_cnxml(cnxml)
validate_cnxml(cnxml)

output = open(valid_filename, 'w')
output.write(cnxml)
output.close()

示例#3

0

显示文件

    def test_gdocs(self):
        have_test_file = False
        try:
            fp = open('./test_files/gdocs/test_files')
            fp.close()
            have_test_file = True
        except:
            print('No gdocs test file')

        doc_files = os.listdir(test_folder_name + 'doc/')
        rids = []
        i = 0
        while (i < len(doc_files)):
            f = doc_files[i]
            filename, extension = os.path.splitext(f)
            if (extension != '.doc'):
                doc_files.remove(f)
            else:
                i = i + 1
        for d in doc_files:
            try:
                just_filename = os.path.basename(d)
                just_filename, extension = os.path.splitext(just_filename)
                rid = upload_doc(test_folder_name + 'doc/' + d,
                                 'application/msword', just_filename)
                rids.append(rid)
            except KeyboardInterrupt:
                exit()
            except:
                print('Error uploading ' + just_filename + ' to gdocs')

        if (have_test_file):
            fp = open('./test_files/gdocs/test_files')
            for url in fp:
                if (url[0] == '#'):
                    continue
                match_doc_id = re.match(
                    r'^.*docs\.google\.com/document/d/([^/]+).*$', url)
                if match_doc_id:
                    rids.append('document:' + match_doc_id.group(1))
            fp.close()

        count = 0
        for rid in rids:
            if (count < len(doc_files)):
                filename = os.path.basename(doc_files[count])
                filename, ext = os.path.splitext(filename)
            else:
                filename = rid[9:]

            valid_filename = './test_files/gdocs/' + filename + '.cnxml'
            output_filename = './test_files/gdocs/' + filename + '.tmp'
            diff_filename = './test_files/gdocs/' + filename + '.diff'
            err_filename = './test_files/gdocs/' + filename + '.err'

            gdoc_url = construct_url(rid[9:])
            rid, original_title = get_gdoc(gdoc_url, './test_files/gdocs')
            html_filename = './test_files/gdocs/' + rid[9:] + '.htm'
            html_file = open(html_filename, 'r')
            try:
                html = html_file.read()
                html_file.flush()
            finally:
                html_file.close()
            cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True)
            cnxml = clean_cnxml(cnxml)
            validate_cnxml(cnxml)

            output = open(output_filename, 'w')
            output.write(cnxml)
            output.close()
            remove_ids(output_filename)
            os.remove('./test_files/gdocs/' + rid[9:] + '.htm')

            process = subprocess.Popen(
                ['diff', valid_filename, output_filename],
                shell=False,
                stdout=subprocess.PIPE)
            std_output = process.communicate()

            if (std_output[0] != None and len(std_output[0]) != 0):
                diff_output = open(diff_filename, 'w')
                diff_output.write(std_output[0])
                diff_output.close()
                print(
                    'Differences in the testing of gdoc ' + filename +
                    ', information on those differences has been placed in ' +
                    diff_filename)
            elif (std_output[1] != None and len(std_output[1]) != 0):
                err_output = open(err_filename, 'w')
                err_output.write(std_output[1])
                err_output.close()
                print(
                    'Error(s) occurred while attempting to test for differences in CNXML output of gdoc '
                    + filename + ', information on these errors are in ' +
                    err_filename)
            count = count + 1

示例#4

0

显示文件

文件： convert_all.py 项目： oerpub/oerpub.remix

            rids.append('document:'+match_doc_id.group(1))
    fp.close()

count = 0
for rid in rids:
    print(rid)
    if(count < len(doc_files)):
        filename = os.path.basename(doc_files[count])
        filename,ext = os.path.splitext(filename)
    else:
        filename = rid[9:]

    valid_filename='./test_files/gdocs/'+filename+'.cnxml'

    gdoc_url = construct_url(rid[9:])
    rid,original_title = get_gdoc(gdoc_url, './test_files/gdocs')
    html_filename = './test_files/gdocs/'+rid[9:]+'.htm'
    html_file = open(html_filename, 'r')
    try:
        html = html_file.read()
        html_file.flush()
    finally:
        html_file.close()

    cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True)
    cnxml = clean_cnxml(cnxml)
    validate_cnxml(cnxml)

    output=open(valid_filename,'w')
    output.write(cnxml)
    output.close()

示例#5

0

显示文件

文件： convert_all.py 项目： oerpub/oerpub.rhaptoslabs.swordpushweb

            rids.append("document:" + match_doc_id.group(1))
    fp.close()

count = 0
for rid in rids:
    print(rid)
    if count < len(doc_files):
        filename = os.path.basename(doc_files[count])
        filename, ext = os.path.splitext(filename)
    else:
        filename = rid[9:]

    valid_filename = "./test_files/gdocs/" + filename + ".cnxml"

    gdoc_url = construct_url(rid[9:])
    rid, original_title = get_gdoc(gdoc_url, "./test_files/gdocs")
    html_filename = "./test_files/gdocs/" + rid[9:] + ".htm"
    html_file = open(html_filename, "r")
    try:
        html = html_file.read()
        html_file.flush()
    finally:
        html_file.close()

    cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True)
    cnxml = clean_cnxml(cnxml)
    validate_cnxml(cnxml)

    output = open(valid_filename, "w")
    output.write(cnxml)
    output.close()

示例#6

0

显示文件

文件： convert_gdocs_cnxml.py 项目： oerpub/oerpub.remix

from oerpub.rhaptoslabs.html_gdocs2cnxml.gdocs2cnxml import gdocs_to_cnxml
from utils import clean_cnxml, escape_system
from test_conversion import validate_cnxml, remove_ids

url='https://docs.google.com/document/d/1tiZR1fhBl3ZQ_UaQ5sRDA3gSs_7LjgtTITkBAGjuTpI/edit'
#url='https://docs.google.com/document/d/1Gw9j1J-_d5YQoq6SIc3Az2hiVlwtvVcJkXfYKDR_zBM/edit'

match_doc_id = re.match(r'^.*docs\.google\.com/document/d/([^/]+).*$', url)
rid='document:'+match_doc_id.group(1)

print(rid)
filename = rid[9:]
valid_filename='valid.cnxml'
gdoc_url = construct_url(rid[9:])
print(gdoc_url)
rid,original_title = get_gdoc(url, './')
html_filename = './'+rid[9:]+'.htm'
html_file = open(html_filename, 'r')
try:
    html = html_file.read()
    html_file.flush()
finally:
    html_file.close()

cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True)
cnxml = clean_cnxml(cnxml)
validate_cnxml(cnxml)

output=open(valid_filename,'w')
output.write(cnxml)
output.close()