def convert_legacy_folder_to_tkbs_format(src_path, dst_path): try: p = Document() p.load_legacy_data(src_path) p.export_tkbs_format(dst_path) except Exception as e: print("ERROR in convert_legacy_folder_to_tkbs_format with src_path " + src_path) print(e)
def make_pxml(res=None, f1=None, f2=None): log(0,"pxml") p = Document() for f in factors: p.set_factors(f[0], f[1], f[2]) if res is not None: p.set_factors(res,f1,f2) # directory containing TOC.xml p.load_legacy_data(paper) p.export_tkbs_format(os.path.join(paper, config['pxml_dir'])) log(1,"pxml")
pass v = True infolder = r'C:\_test_\in_0105' #CHANGE THIS outfolder = r'C:\_test_\out' #CHANGE THIS v and print("--- CREATING DATA to upload ---") p = Document() #p.set_factors(150, 1.7238, 0.67) p.load_legacy_data(infolder) exportdir = os.path.join(outfolder, "pagexml_for_upload") prep_dir(exportdir) p.export_tkbs_format(exportdir) v and print("--- CONNECTING to server ---") user = "******" #CHANGE THIS key = "<password>" #CHANGE THIS collec = "17989" #CHANGE THIS tkbs = TranskribusClient(sServerUrl="https://transkribus.eu/TrpServer") tkbs.auth_login(user, key, True) #HTRmodelname = 'Test' HTRmodelid = "10168" #CHANGE THIS #dictName = "Hebrew_Test.dict" #CHANGE THIS #print("session id: " + tkbs.getSessionId() + "\n=================") v and print("--- UPLOADING data to server ---") docid = upload(collec, exportdir, p.img_names_by_pgnum(), p.pxml_names_by_pgnum(), p.title, user, "pipeline test", tkbs)
def convert_legacy_folder_to_tkbs_format(src_path, dst_path): p = Document() p.load_legacy_data(src_path) p.export_tkbs_format(dst_path)
v and print("--- CREATING DATA to upload ---") p = Document() #p.set_factors(150, 1.7238, 0.67) p.load_legacy_data(infolder) teifolder = os.path.join(exportfolder, 'tei') teifiles = glob.glob(teifolder + r'\*' + p.doc_title + r'*_tei.xml') if len(teifiles) > 0: v and print("TEI found, Skipping document " + p.doc_title) continue uniquename = p.doc_title + "_" + start firstuploadtopdir = prep_dir( os.path.join(workfolder, r'pagexml_for_upload')) firstexportdir = prep_dir(os.path.join(firstuploadtopdir, uniquename)) p.export_tkbs_format(firstexportdir) v and print("--- UPLOADING data to server ---") docid = upload(collec, firstexportdir, p.img_names_by_pgnum(), p.pxml_names_by_pgnum(), p.title, user, "pipeline test", tkbs) if docid <= 0: print("ERROR - document failed to upload " + p.title) continue v and print("--- DOWNLOADING-1 doc for page ids ---") tempdowndir = prep_dir(os.path.join(workfolder, "tempdowndir")) target_dir = os.path.join( tempdowndir, p.title + "_" + str(collec) + "_" + str(docid)) docjson = download(collec, str(docid), target_dir, tkbs, p.tkbs_meta_filename)