Python upload_doc示例，splitter.upload_doc Python示例

示例#1

0

显示文件

文件： test_conversion.py 项目： oerpub/oerpub.remix

    def test_gdocs(self):
        have_test_file = False
        try:
            fp = open('./test_files/gdocs/test_files')
            fp.close()
            have_test_file = True
        except:
            print('No gdocs test file')

        doc_files=os.listdir(test_folder_name+'doc/')
        rids = [ ]
        i=0
        while(i < len(doc_files)):
            f=doc_files[i]
            filename, extension = os.path.splitext(f)
            if(extension != '.doc'):
                doc_files.remove(f)
            else:
                i=i+1
        for d in doc_files:
            try:
                just_filename=os.path.basename(d)
                just_filename, extension = os.path.splitext(just_filename)
                rid = upload_doc(test_folder_name+'doc/'+d, 'application/msword',just_filename)
                rids.append(rid)
            except KeyboardInterrupt:
                exit()
            except :
                print('Error uploading '+just_filename+' to gdocs')

        if(have_test_file):
            fp = open('./test_files/gdocs/test_files')
            for url in fp:
                if(url[0] == '#'):
                    continue
                match_doc_id = re.match(r'^.*docs\.google\.com/document/d/([^/]+).*$', url)
                if match_doc_id:
                    rids.append('document:'+match_doc_id.group(1))
            fp.close()
                
        count = 0
        for rid in rids:
            if(count < len(doc_files)):
                filename = os.path.basename(doc_files[count])
                filename,ext = os.path.splitext(filename)
            else:
                filename = rid[9:]

            valid_filename='./test_files/gdocs/'+filename+'.cnxml'
            output_filename='./test_files/gdocs/'+filename+'.tmp'
            diff_filename = './test_files/gdocs/'+filename+'.diff'
            err_filename = './test_files/gdocs/'+filename+'.err'

            gdoc_url = construct_url(rid[9:])
            rid,original_title = get_gdoc(gdoc_url, './test_files/gdocs')
            html_filename = './test_files/gdocs/'+rid[9:]+'.htm'
            html_file = open(html_filename, 'r')
            try:
                html = html_file.read()
                html_file.flush()
            finally:
                html_file.close()
            cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True)
            cnxml = clean_cnxml(cnxml)
            validate_cnxml(cnxml)

            output=open(output_filename,'w')
            output.write(cnxml)
            output.close()
            remove_ids(output_filename)
            os.remove('./test_files/gdocs/'+rid[9:]+'.htm')

            process = subprocess.Popen(['diff',valid_filename,output_filename], shell=False, stdout=subprocess.PIPE)
            std_output = process.communicate()

            if(std_output[0] != None and len(std_output[0]) != 0):
                diff_output=open(diff_filename,'w')
                diff_output.write(std_output[0])
                diff_output.close()
                print('Differences in the testing of gdoc '+filename+', information on those differences has been placed in '+diff_filename)
            elif(std_output[1] != None and len(std_output[1]) != 0):
                err_output=open(err_filename,'w')
                err_output.write(std_output[1])
                err_output.close()
                print('Error(s) occurred while attempting to test for differences in CNXML output of gdoc '+filename+', information on these errors are in '+err_filename)
            count = count + 1

示例#2

0

显示文件

    def test_gdocs(self):
        have_test_file = False
        try:
            fp = open('./test_files/gdocs/test_files')
            fp.close()
            have_test_file = True
        except:
            print('No gdocs test file')

        doc_files = os.listdir(test_folder_name + 'doc/')
        rids = []
        i = 0
        while (i < len(doc_files)):
            f = doc_files[i]
            filename, extension = os.path.splitext(f)
            if (extension != '.doc'):
                doc_files.remove(f)
            else:
                i = i + 1
        for d in doc_files:
            try:
                just_filename = os.path.basename(d)
                just_filename, extension = os.path.splitext(just_filename)
                rid = upload_doc(test_folder_name + 'doc/' + d,
                                 'application/msword', just_filename)
                rids.append(rid)
            except KeyboardInterrupt:
                exit()
            except:
                print('Error uploading ' + just_filename + ' to gdocs')

        if (have_test_file):
            fp = open('./test_files/gdocs/test_files')
            for url in fp:
                if (url[0] == '#'):
                    continue
                match_doc_id = re.match(
                    r'^.*docs\.google\.com/document/d/([^/]+).*$', url)
                if match_doc_id:
                    rids.append('document:' + match_doc_id.group(1))
            fp.close()

        count = 0
        for rid in rids:
            if (count < len(doc_files)):
                filename = os.path.basename(doc_files[count])
                filename, ext = os.path.splitext(filename)
            else:
                filename = rid[9:]

            valid_filename = './test_files/gdocs/' + filename + '.cnxml'
            output_filename = './test_files/gdocs/' + filename + '.tmp'
            diff_filename = './test_files/gdocs/' + filename + '.diff'
            err_filename = './test_files/gdocs/' + filename + '.err'

            gdoc_url = construct_url(rid[9:])
            rid, original_title = get_gdoc(gdoc_url, './test_files/gdocs')
            html_filename = './test_files/gdocs/' + rid[9:] + '.htm'
            html_file = open(html_filename, 'r')
            try:
                html = html_file.read()
                html_file.flush()
            finally:
                html_file.close()
            cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True)
            cnxml = clean_cnxml(cnxml)
            validate_cnxml(cnxml)

            output = open(output_filename, 'w')
            output.write(cnxml)
            output.close()
            remove_ids(output_filename)
            os.remove('./test_files/gdocs/' + rid[9:] + '.htm')

            process = subprocess.Popen(
                ['diff', valid_filename, output_filename],
                shell=False,
                stdout=subprocess.PIPE)
            std_output = process.communicate()

            if (std_output[0] != None and len(std_output[0]) != 0):
                diff_output = open(diff_filename, 'w')
                diff_output.write(std_output[0])
                diff_output.close()
                print(
                    'Differences in the testing of gdoc ' + filename +
                    ', information on those differences has been placed in ' +
                    diff_filename)
            elif (std_output[1] != None and len(std_output[1]) != 0):
                err_output = open(err_filename, 'w')
                err_output.write(std_output[1])
                err_output.close()
                print(
                    'Error(s) occurred while attempting to test for differences in CNXML output of gdoc '
                    + filename + ', information on these errors are in ' +
                    err_filename)
            count = count + 1

示例#3

0

显示文件

文件： convert_all.py 项目： oerpub/oerpub.rhaptoslabs.swordpushweb

i = 0
while i < len(doc_files):
    current_file = doc_files[i]
    name, extension = os.path.splitext(current_file)
    if extension != ".doc":
        doc_files.remove(current_file)
    else:
        i = i + 1

test_folder_name = "./test_files/"
for d in doc_files:
    try:
        just_filename = os.path.basename(d)
        just_filename, extension = os.path.splitext(just_filename)
        rid = upload_doc(test_folder_name + "doc/" + d, "application/msword", just_filename)
        rids.append(rid)
    except KeyboardInterrupt:
        exit()
#    except :
#        print('Error uploading '+just_filename+' to gdocs')

if have_test_file:
    fp = open("./test_files/gdocs/test_files")
    for url in fp:
        if url[0] == "#":
            continue
        match_doc_id = re.match(r"^.*docs\.google\.com/document/d/([^/]+).*$", url)
        if match_doc_id:
            rids.append("document:" + match_doc_id.group(1))
    fp.close()

示例#4

0

显示文件

文件： convert_all.py 项目： oerpub/oerpub.remix

i = 0
while(i < len(doc_files)):
    current_file = doc_files[i]
    name, extension = os.path.splitext(current_file)
    if(extension != '.doc'):
        doc_files.remove(current_file)
    else:
        i = i + 1

test_folder_name = './test_files/'
for d in doc_files:
    try:
        just_filename=os.path.basename(d)
        just_filename, extension = os.path.splitext(just_filename)
        rid = upload_doc(test_folder_name+'doc/'+d, 'application/msword',just_filename)
        rids.append(rid)
    except KeyboardInterrupt:
        exit()
#    except :
#        print('Error uploading '+just_filename+' to gdocs')

if(have_test_file):
    fp = open('./test_files/gdocs/test_files')
    for url in fp:
        if(url[0] == '#'):
            continue
        match_doc_id = re.match(r'^.*docs\.google\.com/document/d/([^/]+).*$', url)
        if match_doc_id:
            rids.append('document:'+match_doc_id.group(1))
    fp.close()

示例#5

0

显示文件

文件： convert_all_odt.py 项目： oerpub/oerpub.remix

doc_files = os.listdir(doc_folder)
i = 0
while(i < len(doc_files)):
    current_file = doc_files[i]
    name, extension = os.path.splitext(current_file)
    if(extension != '.doc'):
        doc_files.remove(current_file)
    else:
        i = i + 1

test_folder_name = './test_files/'
for d in doc_files:
    try:
        just_filename=os.path.basename(d)
        just_filename, extension = os.path.splitext(just_filename)
        rid = upload_doc(test_folder_name+'doc/'+d, 'application/msword',just_filename)
        rids.append(rid)
    except KeyboardInterrupt:
        exit()
#    except :
#        print('Error uploading '+just_filename+' to gdocs')

count = 0
for rid in rids:
    print(rid)
    filename = os.path.basename(doc_files[count])
    filename,ext = os.path.splitext(filename)

    valid_filename='./test_files/gdocs/'+filename+'.cnxml'

    gdoc_url = construct_url(rid[9:])