Exemplo n.º 1
0
def convert_legacy_folder_to_tkbs_format(src_path, dst_path):
    try:
        p = Document()
        p.load_legacy_data(src_path)
        p.export_tkbs_format(dst_path)
    except Exception as e:
        print("ERROR in convert_legacy_folder_to_tkbs_format with src_path " +
              src_path)
        print(e)
Exemplo n.º 2
0
def make_pxml(res=None, f1=None, f2=None):
    log(0,"pxml")
    p = Document()
    for f in factors:
        p.set_factors(f[0], f[1], f[2])
    if res is not None:
        p.set_factors(res,f1,f2)
    # directory containing TOC.xml
    p.load_legacy_data(paper)
    p.export_tkbs_format(os.path.join(paper, config['pxml_dir']))
    log(1,"pxml")
Exemplo n.º 3
0
        pass


v = True

infolder = r'C:\_test_\in_0105'  #CHANGE THIS
outfolder = r'C:\_test_\out'  #CHANGE THIS

v and print("---   CREATING DATA to upload  ---")
p = Document()
#p.set_factors(150, 1.7238, 0.67)
p.load_legacy_data(infolder)

exportdir = os.path.join(outfolder, "pagexml_for_upload")
prep_dir(exportdir)
p.export_tkbs_format(exportdir)

v and print("---   CONNECTING to server    ---")
user = "******"  #CHANGE THIS
key = "<password>"  #CHANGE THIS
collec = "17989"  #CHANGE THIS
tkbs = TranskribusClient(sServerUrl="https://transkribus.eu/TrpServer")
tkbs.auth_login(user, key, True)
#HTRmodelname = 'Test'
HTRmodelid = "10168"  #CHANGE THIS
#dictName =  "Hebrew_Test.dict" #CHANGE THIS
#print("session id: " + tkbs.getSessionId() + "\n=================")

v and print("---   UPLOADING data to server       ---")
docid = upload(collec, exportdir, p.img_names_by_pgnum(),
               p.pxml_names_by_pgnum(), p.title, user, "pipeline test", tkbs)
def convert_legacy_folder_to_tkbs_format(src_path, dst_path):
    p = Document()
    p.load_legacy_data(src_path)
    p.export_tkbs_format(dst_path)
Exemplo n.º 5
0
        v and print("---   CREATING DATA to upload  ---")
        p = Document()
        #p.set_factors(150, 1.7238, 0.67)
        p.load_legacy_data(infolder)

        teifolder = os.path.join(exportfolder, 'tei')
        teifiles = glob.glob(teifolder + r'\*' + p.doc_title + r'*_tei.xml')
        if len(teifiles) > 0:
            v and print("TEI found, Skipping document " + p.doc_title)
            continue

        uniquename = p.doc_title + "_" + start
        firstuploadtopdir = prep_dir(
            os.path.join(workfolder, r'pagexml_for_upload'))
        firstexportdir = prep_dir(os.path.join(firstuploadtopdir, uniquename))
        p.export_tkbs_format(firstexportdir)

        v and print("---   UPLOADING data to server       ---")
        docid = upload(collec, firstexportdir, p.img_names_by_pgnum(),
                       p.pxml_names_by_pgnum(), p.title, user, "pipeline test",
                       tkbs)
        if docid <= 0:
            print("ERROR - document failed to upload " + p.title)
            continue

        v and print("---   DOWNLOADING-1 doc for page ids       ---")
        tempdowndir = prep_dir(os.path.join(workfolder, "tempdowndir"))
        target_dir = os.path.join(
            tempdowndir, p.title + "_" + str(collec) + "_" + str(docid))
        docjson = download(collec, str(docid), target_dir, tkbs,
                           p.tkbs_meta_filename)