def test_gdocs(self): have_test_file = False try: fp = open('./test_files/gdocs/test_files') fp.close() have_test_file = True except: print('No gdocs test file') doc_files=os.listdir(test_folder_name+'doc/') rids = [ ] i=0 while(i < len(doc_files)): f=doc_files[i] filename, extension = os.path.splitext(f) if(extension != '.doc'): doc_files.remove(f) else: i=i+1 for d in doc_files: try: just_filename=os.path.basename(d) just_filename, extension = os.path.splitext(just_filename) rid = upload_doc(test_folder_name+'doc/'+d, 'application/msword',just_filename) rids.append(rid) except KeyboardInterrupt: exit() except : print('Error uploading '+just_filename+' to gdocs') if(have_test_file): fp = open('./test_files/gdocs/test_files') for url in fp: if(url[0] == '#'): continue match_doc_id = re.match(r'^.*docs\.google\.com/document/d/([^/]+).*$', url) if match_doc_id: rids.append('document:'+match_doc_id.group(1)) fp.close() count = 0 for rid in rids: if(count < len(doc_files)): filename = os.path.basename(doc_files[count]) filename,ext = os.path.splitext(filename) else: filename = rid[9:] valid_filename='./test_files/gdocs/'+filename+'.cnxml' output_filename='./test_files/gdocs/'+filename+'.tmp' diff_filename = './test_files/gdocs/'+filename+'.diff' err_filename = './test_files/gdocs/'+filename+'.err' gdoc_url = construct_url(rid[9:]) rid,original_title = get_gdoc(gdoc_url, './test_files/gdocs') html_filename = './test_files/gdocs/'+rid[9:]+'.htm' html_file = open(html_filename, 'r') try: html = html_file.read() html_file.flush() finally: html_file.close() cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True) cnxml = clean_cnxml(cnxml) validate_cnxml(cnxml) output=open(output_filename,'w') output.write(cnxml) output.close() remove_ids(output_filename) os.remove('./test_files/gdocs/'+rid[9:]+'.htm') process = subprocess.Popen(['diff',valid_filename,output_filename], shell=False, stdout=subprocess.PIPE) std_output = process.communicate() if(std_output[0] != None and len(std_output[0]) != 0): diff_output=open(diff_filename,'w') diff_output.write(std_output[0]) diff_output.close() print('Differences in the testing of gdoc '+filename+', information on those differences has been placed in '+diff_filename) elif(std_output[1] != None and len(std_output[1]) != 0): err_output=open(err_filename,'w') err_output.write(std_output[1]) err_output.close() print('Error(s) occurred while attempting to test for differences in CNXML output of gdoc '+filename+', information on these errors are in '+err_filename) count = count + 1
from oerpub.rhaptoslabs.html_gdocs2cnxml.gdocs2cnxml import gdocs_to_cnxml from utils import clean_cnxml, escape_system from test_conversion import validate_cnxml, remove_ids url = 'https://docs.google.com/document/d/1tiZR1fhBl3ZQ_UaQ5sRDA3gSs_7LjgtTITkBAGjuTpI/edit' #url='https://docs.google.com/document/d/1Gw9j1J-_d5YQoq6SIc3Az2hiVlwtvVcJkXfYKDR_zBM/edit' match_doc_id = re.match(r'^.*docs\.google\.com/document/d/([^/]+).*$', url) rid = 'document:' + match_doc_id.group(1) print(rid) filename = rid[9:] valid_filename = 'valid.cnxml' gdoc_url = construct_url(rid[9:]) print(gdoc_url) rid, original_title = get_gdoc(url, './') html_filename = './' + rid[9:] + '.htm' html_file = open(html_filename, 'r') try: html = html_file.read() html_file.flush() finally: html_file.close() cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True) cnxml = clean_cnxml(cnxml) validate_cnxml(cnxml) output = open(valid_filename, 'w') output.write(cnxml) output.close()
def test_gdocs(self): have_test_file = False try: fp = open('./test_files/gdocs/test_files') fp.close() have_test_file = True except: print('No gdocs test file') doc_files = os.listdir(test_folder_name + 'doc/') rids = [] i = 0 while (i < len(doc_files)): f = doc_files[i] filename, extension = os.path.splitext(f) if (extension != '.doc'): doc_files.remove(f) else: i = i + 1 for d in doc_files: try: just_filename = os.path.basename(d) just_filename, extension = os.path.splitext(just_filename) rid = upload_doc(test_folder_name + 'doc/' + d, 'application/msword', just_filename) rids.append(rid) except KeyboardInterrupt: exit() except: print('Error uploading ' + just_filename + ' to gdocs') if (have_test_file): fp = open('./test_files/gdocs/test_files') for url in fp: if (url[0] == '#'): continue match_doc_id = re.match( r'^.*docs\.google\.com/document/d/([^/]+).*$', url) if match_doc_id: rids.append('document:' + match_doc_id.group(1)) fp.close() count = 0 for rid in rids: if (count < len(doc_files)): filename = os.path.basename(doc_files[count]) filename, ext = os.path.splitext(filename) else: filename = rid[9:] valid_filename = './test_files/gdocs/' + filename + '.cnxml' output_filename = './test_files/gdocs/' + filename + '.tmp' diff_filename = './test_files/gdocs/' + filename + '.diff' err_filename = './test_files/gdocs/' + filename + '.err' gdoc_url = construct_url(rid[9:]) rid, original_title = get_gdoc(gdoc_url, './test_files/gdocs') html_filename = './test_files/gdocs/' + rid[9:] + '.htm' html_file = open(html_filename, 'r') try: html = html_file.read() html_file.flush() finally: html_file.close() cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True) cnxml = clean_cnxml(cnxml) validate_cnxml(cnxml) output = open(output_filename, 'w') output.write(cnxml) output.close() remove_ids(output_filename) os.remove('./test_files/gdocs/' + rid[9:] + '.htm') process = subprocess.Popen( ['diff', valid_filename, output_filename], shell=False, stdout=subprocess.PIPE) std_output = process.communicate() if (std_output[0] != None and len(std_output[0]) != 0): diff_output = open(diff_filename, 'w') diff_output.write(std_output[0]) diff_output.close() print( 'Differences in the testing of gdoc ' + filename + ', information on those differences has been placed in ' + diff_filename) elif (std_output[1] != None and len(std_output[1]) != 0): err_output = open(err_filename, 'w') err_output.write(std_output[1]) err_output.close() print( 'Error(s) occurred while attempting to test for differences in CNXML output of gdoc ' + filename + ', information on these errors are in ' + err_filename) count = count + 1
rids.append('document:'+match_doc_id.group(1)) fp.close() count = 0 for rid in rids: print(rid) if(count < len(doc_files)): filename = os.path.basename(doc_files[count]) filename,ext = os.path.splitext(filename) else: filename = rid[9:] valid_filename='./test_files/gdocs/'+filename+'.cnxml' gdoc_url = construct_url(rid[9:]) rid,original_title = get_gdoc(gdoc_url, './test_files/gdocs') html_filename = './test_files/gdocs/'+rid[9:]+'.htm' html_file = open(html_filename, 'r') try: html = html_file.read() html_file.flush() finally: html_file.close() cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True) cnxml = clean_cnxml(cnxml) validate_cnxml(cnxml) output=open(valid_filename,'w') output.write(cnxml) output.close()
rids.append("document:" + match_doc_id.group(1)) fp.close() count = 0 for rid in rids: print(rid) if count < len(doc_files): filename = os.path.basename(doc_files[count]) filename, ext = os.path.splitext(filename) else: filename = rid[9:] valid_filename = "./test_files/gdocs/" + filename + ".cnxml" gdoc_url = construct_url(rid[9:]) rid, original_title = get_gdoc(gdoc_url, "./test_files/gdocs") html_filename = "./test_files/gdocs/" + rid[9:] + ".htm" html_file = open(html_filename, "r") try: html = html_file.read() html_file.flush() finally: html_file.close() cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True) cnxml = clean_cnxml(cnxml) validate_cnxml(cnxml) output = open(valid_filename, "w") output.write(cnxml) output.close()
from oerpub.rhaptoslabs.html_gdocs2cnxml.gdocs2cnxml import gdocs_to_cnxml from utils import clean_cnxml, escape_system from test_conversion import validate_cnxml, remove_ids url='https://docs.google.com/document/d/1tiZR1fhBl3ZQ_UaQ5sRDA3gSs_7LjgtTITkBAGjuTpI/edit' #url='https://docs.google.com/document/d/1Gw9j1J-_d5YQoq6SIc3Az2hiVlwtvVcJkXfYKDR_zBM/edit' match_doc_id = re.match(r'^.*docs\.google\.com/document/d/([^/]+).*$', url) rid='document:'+match_doc_id.group(1) print(rid) filename = rid[9:] valid_filename='valid.cnxml' gdoc_url = construct_url(rid[9:]) print(gdoc_url) rid,original_title = get_gdoc(url, './') html_filename = './'+rid[9:]+'.htm' html_file = open(html_filename, 'r') try: html = html_file.read() html_file.flush() finally: html_file.close() cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True) cnxml = clean_cnxml(cnxml) validate_cnxml(cnxml) output=open(valid_filename,'w') output.write(cnxml) output.close()