def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN): TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel)
def __init__(self, trnkbsServerUrl, sHttpProxy=None, loggingLevel=logging.WARN): TranskribusClient.__init__(self, sServerUrl=self.sDefaultServerUrl, proxies=sHttpProxy, loggingLevel=loggingLevel) self._trpMng = DoTranscript(self.sDefaultServerUrl, sHttpProxy=sHttpProxy, loggingLevel=loggingLevel) self.percTest = 0.1
import logging try: #to ease the use without proper Python installation import TranskribusPyClient_version except ImportError: sys.path.append( os.path.dirname( (os.path.dirname(os.path.dirname(os.path.abspath(sys.argv[0])))))) import TranskribusPyClient_version from TranskribusPyClient.test import _colId_A, _docId_b from TranskribusPyClient.client import TranskribusClient, getStoredCredentials login, pwd = getStoredCredentials() conn = TranskribusClient(proxies={'https': 'http://*****:*****@xrce.xerox.com", "trnjluc", sHttpsProxyUrl='http://cornillon:8000') sXml = u"""<?xml version="1.0" encoding="UTF-8" standalone="yes"?> <PcGts xmlns="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15/pagecontent.xsd"> <Metadata> <Creator>TRP</Creator> <Created>2016-08-18T13:35:08.767+07:00</Created> <LastChange>2016-12-01T09:59:24.254+01:00</LastChange>
class TableProcessing(Component.Component): usage = "" version = "v.01" description = "description: table layout analysis based on template" sCOL = "col" sMPXMLExtension = ".mpxml" def __init__(self): """ Always call first the Component constructor. """ Component.Component.__init__(self, "TableProcessing", self.usage, self.version, self.description) self.colid = None self.docid = None self.bFullCol = False # generate MPXML using Ext self.useExtForMPXML = False self.bRegenerateMPXML = False self.sRowModelName = None self.sRowModelDir = None self.sHTRmodel = None self.sDictName = None def setParams(self, dParams): """ Always call first the Component setParams Here, we set our internal attribute according to a possibly specified value (otherwise it stays at its default value) """ Component.Component.setParams(self, dParams) if "coldir" in dParams: self.coldir = dParams["coldir"] if "colid" in dParams: self.colid = dParams["colid"] if "colid" in dParams: self.docid = dParams["docid"] if "useExt" in dParams: self.useExtForMPXML = dParams["useExt"] if 'regMPXML' in dParams: self.bRegenerateMPXML = True if "rowmodelname" in dParams: self.sRowModelName = dParams["rowmodelname"] if "rowmodeldir" in dParams: self.sRowModelDir = dParams["rowmodeldir"] if "htrmodel" in dParams: self.sHTRmodel = dParams["htrmodel"] if "dictname" in dParams: self.sDictName = dParams["dictname"] # Connection to Transkribus self.myTrKCient = None self.persist = False self.loginInfo = False if dParams.has_key("server"): self.server = dParams["server"] if dParams.has_key("persist"): self.persist = dParams["persist"] if dParams.has_key("login"): self.loginInfo = dParams["login"] def login(self, trnskrbs_client, trace=None, traceln=None): """ deal with the complicated login variants... -trace and traceln are optional print methods return True or raises an exception """ DEBUG = True bOk = False if self.persist: #try getting some persistent session token if DEBUG and trace: trace(" ---login--- Try reusing persistent session ... ") try: bOk = trnskrbs_client.reusePersistentSession() if DEBUG and traceln: traceln("OK!") except: if DEBUG and traceln: traceln("Failed") if not bOk: if self.loginInfo: login, pwd = self.loginInfo, self.pwd else: if trace: DEBUG and trace( " ---login--- no login provided, looking for stored credentials... " ) login, pwd = trnskrbs_client.getStoredCredentials(bAsk=False) if DEBUG and traceln: traceln("OK") if DEBUG and traceln: trace(" ---login--- logging onto Transkribus as %s " % login) trnskrbs_client.auth_login(login, pwd) if DEBUG and traceln: traceln("OK") bOk = True return bOk def downloadCollection(self, colid, destDir, docid, bNoImg=True, bForce=False): """ download colID replace destDir by '.' ? """ destDir = "." # options.server, proxies, loggingLevel=logging.WARN) #download downloader = TranskribusDownloader(self.myTrKCient.getServerUrl(), self.myTrKCient.getProxies()) downloader.setSessionId(self.myTrKCient.getSessionId()) traceln("- Downloading collection %s to folder %s" % (colid, os.path.abspath(destDir))) # col_ts, colDir = downloader.downloadCollection(colid, destDir, bForce=options.bForce, bNoImage=options.bNoImage) col_ts, colDir, ldocids, dFileListPerDoc = downloader.downloadCollection( colid, destDir, bForce=bForce, bNoImage=bNoImg, sDocId=docid) traceln("- Done") with open(os.path.join(colDir, "config.txt"), "w") as fd: fd.write("server=%s\nforce=%s\nstrict=%s\n" % (self.server, True, False)) downloader.generateCollectionMultiPageXml( os.path.join(colDir, TableProcessing.sCOL), dFileListPerDoc, False) traceln('- Done, see in %s' % colDir) return ldocids def upLoadDocument(self, colid, coldir, docid, sNote="", sTranscripExt='.mpxml'): """ download colID """ # options.server, proxies, loggingLevel=logging.WARN) #download # uploader = TranskribusTranscriptUploader(self.server,self.proxies) uploader = TranskribusDUTranscriptUploader( self.myTrKCient.getServerUrl(), self.myTrKCient.getProxies()) uploader.setSessionId(self.myTrKCient.getSessionId()) traceln("- uploading document %s to collection %s" % (docid, colid)) uploader.uploadDocumentTranscript(colid, docid, os.path.join(coldir, sCOL), sNote, 'NLE Table', sTranscripExt, iVerbose=False) traceln("- Done") return def applyLA_URO(self, colid, docid, nbpages): """ apply textline finder """ # do the job... # if options.trp_doc: # trpdoc = json.load(codecs.open(options.trp_doc, "rb",'utf-8')) # docId,sPageDesc = doer.buildDescription(colId,options.docid,trpdoc) traceln('process %s pages...' % nbpages) lretJobIDs = [] for i in range(1, nbpages + 1): LA = DoLAbatch(self.myTrKCient.getServerUrl(), self.myTrKCient.getProxies()) LA._trpMng.setSessionId(self.myTrKCient.getSessionId()) LA.setSessionId(self.myTrKCient.getSessionId()) _, sPageDesc = LA.buildDescription(colid, "%s/%s" % (docid, i)) sPageDesc = LA.jsonToXMLDescription(sPageDesc) _, lJobIDs = LA.run(colid, sPageDesc, "CITlabAdvancedLaJob", False) traceln(lJobIDs) lretJobIDs.extend(lJobIDs) traceln("- LA running for page %d job:%s" % (i, lJobIDs)) return lretJobIDs def applyHTRForRegions(self, colid, docid, nbpages, modelname, dictionary): """ apply an htr model at region level """ htrComp = DoHtrRnn(self.myTrKCient.getServerUrl(), self.myTrKCient.getProxies()) htrComp._trpMng.setSessionId(self.myTrKCient.getSessionId()) htrComp.setSessionId(self.myTrKCient.getSessionId()) _, sPageDesc = htrComp.buildDescription(colid, "%s/%s" % (docid, nbpages)) sPages = "1-%d" % (nbpages) sModelID = None # get modelID lColModels = self.myTrKCient.listRnns(colid) for model in lColModels: # print model['htrId'], type(model['htrId']), modelname,type(modelname) if str(model['htrId']) == str(modelname): sModelID = model['htrId'] traceln('model id = %s' % sModelID) #some old? models do not have params field # try: traceln("%s\t%s\t%s" % (model['htrId'],model['name'],model['params'])) # except KeyError: traceln("%s\t%s\tno params" % (model['htrId'],model['name'])) if sModelID == None: raise Exception, "no model ID found for %s" % (modelname) ret = htrComp.htrRnnDecode(colid, sModelID, dictionary, docid, sPageDesc, bDictTemp=False) traceln(ret) return ret def applyHTR(self, colid, docid, nbpages, modelname, dictionary): """ apply HTR on docid htr id is needed: we have htrmodename """ htrComp = DoHtrRnn(self.myTrKCient.getServerUrl(), self.myTrKCient.getProxies()) htrComp._trpMng.setSessionId(self.myTrKCient.getSessionId()) htrComp.setSessionId(self.myTrKCient.getSessionId()) _, sPageDesc = htrComp.buildDescription(colid, "%s/%s" % (docid, nbpages)) sPages = "1-%d" % (nbpages) sModelID = None # get modelID lColModels = self.myTrKCient.listRnns(colid) for model in lColModels: # print model['htrId'], type(model['htrId']), modelname,type(modelname) if str(model['htrId']) == str(modelname): sModelID = model['htrId'] traceln('model id = %s' % sModelID) #some old? models do not have params field # try: traceln("%s\t%s\t%s" % (model['htrId'],model['name'],model['params'])) # except KeyError: traceln("%s\t%s\tno params" % (model['htrId'],model['name'])) if sModelID == None: raise Exception, "no model ID found for %s" % (modelname) ret = htrComp.htrRnnDecode(colid, sModelID, dictionary, docid, sPageDesc, bDictTemp=False) traceln(ret) return ret def extractFileNamesFromMPXML(self, mpxmldoc): """ to insure correct file order ! duplicated form performCVLLA.py """ xmlpath = os.path.abspath(os.path.join(self.coldir, sCOL, self.docid)) lNd = PageXml.getChildByName(mpxmldoc.getRootElement(), 'Page') # for i in lNd:print i return map( lambda x: "%s%s%s.xml" % (xmlpath, os.sep, x.prop('imageFilename')[:-4]), lNd) def processDocument(self, coldir, colid, docid, dom=None): """ process a single document 1 python ../../src/xml_formats/PageXml.py trnskrbs_5400/col/17442 --ext=pxml 2 python ../../src/tasks/performCVLLA.py --coldir=trnskrbs_5400/ --docid=17442 -i trnskrbs_5400/col/17442.mpxml --bl --regTL --form 3 python ../../src/tasks/DU_ABPTable_T.py modelMultiType tableRow2 --run=trnskrbs_5400 4 python ../../src/xml_formats/Page2DS.py --pattern=trnskrbs_5400/col/17442_du.mpxml -o trnskrbs_5400/xml/17442.ds_xml --docid=17442 5 python src/IE_test.py -i trnskrbs_5400/xml/17442.ds_xml -o trnskrbs_5400/out/17442.ds_xml 6 python ../../../TranskribusPyClient/src/TranskribusCommands/TranskribusDU_transcriptUploader.py --nodu trnskrbs_5400 5400 17442 7 python ../../../TranskribusPyClient/src/TranskribusCommands/do_htrRnn.py <model-name> <dictionary-name> 5400 17442 wait 8 python ../../../TranskribusPyClient/src/TranskribusCommands/Transkribus_downloader.py 5400 --force #covnert to ds 9 python ../../src/xml_formats/Page2DS.py --pattern=trnskrbs_5400/col/17442.mpxml -o trnskrbs_5400/xml/17442.ds_xml --docid=17442 10 python src/IE_test.py -i trnskrbs_5400/xml/17442.ds_xml -o trnskrbs_5400/out/17442.ds_xml --doie --usetemplate """ #create Transkribus client self.myTrKCient = TranskribusClient(sServerUrl=self.server, proxies={}, loggingLevel=logging.WARN) #login _ = self.login(self.myTrKCient, trace=trace, traceln=traceln) # self.downloadCollection(colid,coldir,docid,bNoImg=False,bForce=True) ## load dom if dom is None: self.inputFileName = os.path.abspath( os.path.join(coldir, TableProcessing.sCOL, docid + TableProcessing.sMPXMLExtension)) mpxml_doc = self.loadDom() nbPages = MultiPageXml.getNBPages(mpxml_doc) else: # load provided mpxml mpxml_doc = dom nbPages = MultiPageXml.getNBPages(mpxml_doc) # ### table registration: need to compute/select??? the template # # perform LA separator, table registration, baseline with normalization # #python ../../src/tasks/performCVLLA.py --coldir=trnskrbs_5400/ --docid=17442 -i trnskrbs_5400/col/17442.mpxml --bl --regTL --form # tableregtool= LAProcessor() # # latool.setParams(dParams) # tableregtool.coldir = coldir # tableregtool.docid = docid # tableregtool.bTemplate, tableregtool.bSeparator , tableregtool.bBaseLine , tableregtool.bRegularTextLine = True,False,False,False # # creates xml and a new mpxml # mpxml_doc,nbPages = tableregtool.performLA(mpxml_doc) # # # self.upLoadDocument(colid, coldir,docid,sNote='NLE workflow;table reg done') lJobIDs = self.applyLA_URO(colid, docid, nbPages) return bWait = True assert lJobIDs != [] jobid = lJobIDs[-1] traceln("waiting for job %s" % jobid) while bWait: dInfo = self.myTrKCient.getJobStatus(jobid) bWait = dInfo['state'] not in ['FINISHED', 'FAILED'] ## coldir??? self.downloadCollection(colid, coldir, docid, bNoImg=True, bForce=True) ##STOP HERE FOR DAS newx testset: return # tag text for BIES cell #python ../../src/tasks/DU_ABPTable_T.py modelMultiType tableRow2 --run=trnskrbs_5400 """ needed : doer = DU_ABPTable_TypedCRF(sModelName, sModelDir, """ doer = DU_ABPTable_TypedCRF(self.sRowModelName, self.sRowModelDir) doer.load() ## needed predict at file level, and do not store dom, but return it rowpath = os.path.join(coldir, "col") BIESFiles = doer.predict([rowpath], docid) BIESDom = self.loadDom(BIESFiles[0]) # res= BIESDom.saveFormatFileEnc('test.mpxml', "UTF-8",True) # MPXML2DS #python ../../src/xml_formats/Page2DS.py --pattern=trnskrbs_5400/col/17442_du.mpxml -o trnskrbs_5400/xml/17442.ds_xml --docid=17442 dsconv = primaAnalysis() DSBIESdoc = dsconv.convert2DS(BIESDom, self.docid) # create XMLDOC object self.ODoc = XMLDSDocument() self.ODoc.loadFromDom( DSBIESdoc) #,listPages = range(self.firstPage,self.lastPage+1)) # create row #python src/IE_test.py -i trnskrbs_5400/xml/17442.ds_xml -o trnskrbs_5400/out/17442.ds_xml rdc = RowDetection() rdc.findRowsInDoc(self.ODoc) #python ../../src/xml_formats/DS2PageXml.py -i trnskrbs_5400/out/17442.ds_xml --multi # DS2MPXML DS2MPXML = DS2PageXMLConvertor() lPageXml = DS2MPXML.run(self.ODoc.getDom()) if lPageXml != []: # if DS2MPXML.bMultiPages: newDoc = MultiPageXml.makeMultiPageXmlMemory( map(lambda xy: xy[0], lPageXml)) outputFileName = os.path.join( self.coldir, sCOL, self.docid + TableProcessing.sMPXMLExtension) newDoc.write(outputFileName, xml_declaration=True, encoding="UTF-8", pretty_print=True) # else: # DS2MPXML.storePageXmlSetofFiles(lPageXml) return #upload # python ../../../TranskribusPyClient/src/TranskribusCommands/TranskribusDU_transcriptUploader.py --nodu trnskrbs_5400 5400 17442 self.upLoadDocument(colid, coldir, docid, sNote='NLE workflow;table row done') ## apply HTR ## how to deal with specific dictionaries? ## here need to know the ontology and the template nbPages = 1 jobid = self.applyHTR(colid, docid, nbPages, self.sHTRmodel, self.sDictName) bWait = True traceln("waiting for job %s" % jobid) while bWait: dInfo = self.myTrKCient.getJobStatus(jobid) bWait = dInfo['state'] not in ['FINISHED', 'FAILED', 'CANCELED'] # download where??? # python ../../../TranskribusPyClient/src/TranskribusCommands/Transkribus_downloader.py 5400 --force # coldir is not right!! coldir must refer to the parent folder! self.downloadCollection(colid, coldir, docid, bNoImg=True, bForce=True) #done!! # IE extr ## not here: specific to a usecas #python src/IE_test.py -i trnskrbs_5400/xml/17442.ds_xml -o trnskrbs_5400/out/17442.ds_xml --doie --usetemplate def processCollection(self, coldir): """ process all files in a colelction need mpxml files """ lsDocFilename = sorted( glob.iglob( os.path.join(coldir, "*" + TableProcessing.sMPXMLExtension))) lDocId = [] for sDocFilename in lsDocFilename: sDocId = os.path.basename( sDocFilename)[:-len(TableProcessing.sMPXMLExtension)] try: docid = int(sDocId) lDocId.append(docid) except ValueError: traceln("Warning: folder %s : %s invalid docid, IGNORING IT" % (self.coldir, sDocId)) continue # process each document for docid in lDocId: traceln("Processing %s : %s " % (self.coldir, sDocId)) self.processDocument(self.colid, docid) traceln("\tProcessing done for %s " % (self.coldir, sDocId)) def processParameters(self): """ what to do with the parameters provided by the command line """ if self.colid is None: print('collection id missing!') sys.exit(1) self.bFullCol = self.docid != None if self.bRegenerateMPXML and self.docid is not None: l = glob.glob(os.path.join(self.coldir, sCOL, self.docid, "*.pxml")) doc = MultiPageXml.makeMultiPageXml(l) outputFileName = os.path.join( self.coldir, sCOL, self.docid + TableProcessing.sMPXMLExtension) doc.write(outputFileName, xml_declaration=True, encoding="UTF-8", pretty_print=True) return doc return None def run(self): """ process at collection level or document level """ newMPXML = self.processParameters() if self.bFullCol is None: self.processCollection(self.colid) else: self.processDocument(self.coldir, self.colid, self.docid, newMPXML)
class TableProcessing(Component.Component): usage = "" version = "v.01" description = "description: table layout analysis based on template" sCOL = "col" sMPXMLExtension = ".mpxml" def __init__(self): """ Always call first the Component constructor. """ Component.Component.__init__(self, "TableProcessing", self.usage, self.version, self.description) self.colid = None self.docid = None self.bFullCol = False # generate MPXML using Ext self.useExtForMPXML = False self.bRegenerateMPXML = False self.sRowModelName = None self.sRowModelDir = None self.sHTRmodel = None self.sDictName = None def setParams(self, dParams): """ Always call first the Component setParams Here, we set our internal attribute according to a possibly specified value (otherwise it stays at its default value) """ Component.Component.setParams(self, dParams) if "coldir" in dParams: self.coldir = dParams["coldir"] if "colid" in dParams: self.colid = dParams["colid"] if "colid" in dParams: self.docid = dParams["docid"] if "useExt" in dParams: self.useExtForMPXML = dParams["useExt"] if 'mergeTLC' in dParams: self.bUROCVLMerge = dParams["mergeTLC"] if 'regMPXML' in dParams: self.bRegenerateMPXML = dParams["regMPXML"] if "rowmodelname" in dParams: self.sRowModelName = dParams["rowmodelname"] if "rowmodeldir" in dParams: self.sRowModelDir = dParams["rowmodeldir"] if "htrmodel" in dParams: self.sHTRmodel = dParams["htrmodel"] if "dictname" in dParams: self.sDictName = dParams["dictname"] # Connection to Transkribus self.myTrKCient = None self.persist = False self.loginInfo = False if "server" in dParams: self.server = dParams["server"] if "persist" in dParams: self.persist = dParams["persist"] if "login" in dParams: self.loginInfo = dParams["login"] def login(self, trnskrbs_client, trace=None, traceln=None): """ deal with the complicated login variants... -trace and traceln are optional print methods return True or raises an exception """ DEBUG = True bOk = False if self.persist: #try getting some persistent session token if DEBUG and trace: trace(" ---login--- Try reusing persistent session ... ") try: bOk = trnskrbs_client.reusePersistentSession() if DEBUG and traceln: traceln("OK!") except: if DEBUG and traceln: traceln("Failed") if not bOk: if self.loginInfo: login, pwd = self.loginInfo, self.pwd else: if trace: DEBUG and trace( " ---login--- no login provided, looking for stored credentials... " ) login, pwd = trnskrbs_client.getStoredCredentials(bAsk=False) if DEBUG and traceln: traceln("OK") if DEBUG and traceln: trace(" ---login--- logging onto Transkribus as %s " % login) trnskrbs_client.auth_login(login, pwd) if DEBUG and traceln: traceln("OK") bOk = True return bOk def downloadCollection(self, colid, destDir, docid, bNoImg=True, bForce=False): """ download colID replace destDir by '.' ? """ destDir = "." # options.server, proxies, loggingLevel=logging.WARN) #download downloader = TranskribusDownloader(self.myTrKCient.getServerUrl(), self.myTrKCient.getProxies()) downloader.setSessionId(self.myTrKCient.getSessionId()) traceln("- Downloading collection %s to folder %s" % (colid, os.path.abspath(destDir))) # col_ts, colDir = downloader.downloadCollection(colid, destDir, bForce=options.bForce, bNoImage=options.bNoImage) col_ts, colDir, ldocids, dFileListPerDoc = downloader.downloadCollection( colid, destDir, bForce=bForce, bNoImage=bNoImg, sDocId=docid) traceln("- Done") with open(os.path.join(colDir, "config.txt"), "w") as fd: fd.write("server=%s\nforce=%s\nstrict=%s\n" % (self.server, True, False)) downloader.generateCollectionMultiPageXml( os.path.join(colDir, TableProcessing.sCOL), dFileListPerDoc, False) traceln('- Done, see in %s' % colDir) return ldocids def upLoadDocument(self, colid, coldir, docid, sNote="", sTranscripExt='.mpxml'): """ download colID """ # options.server, proxies, loggingLevel=logging.WARN) #download # uploader = TranskribusTranscriptUploader(self.server,self.proxies) uploader = TranskribusDUTranscriptUploader( self.myTrKCient.getServerUrl(), self.myTrKCient.getProxies()) uploader.setSessionId(self.myTrKCient.getSessionId()) traceln("- uploading document %s to collection %s" % (docid, colid)) uploader.uploadDocumentTranscript(colid, docid, os.path.join(coldir, sCOL), sNote, 'NLE Table', sTranscripExt, iVerbose=False) traceln("- Done") return def applyLA_URO(self, colid, docid, nbpages): """ apply textline finder """ # do the job... # if options.trp_doc: # trpdoc = json.load(codecs.open(options.trp_doc, "rb",'utf-8')) # docId,sPageDesc = doer.buildDescription(colId,options.docid,trpdoc) traceln('process %s pages...' % nbpages) lretJobIDs = [] for i in range(1, nbpages + 1): LA = DoLAbatch(self.myTrKCient.getServerUrl(), self.myTrKCient.getProxies()) LA._trpMng.setSessionId(self.myTrKCient.getSessionId()) LA.setSessionId(self.myTrKCient.getSessionId()) _, sPageDesc = LA.buildDescription(colid, "%s/%s" % (docid, i)) sPageDesc = LA.jsonToXMLDescription(sPageDesc) _, lJobIDs = LA.run(colid, sPageDesc, "CITlabAdvancedLaJob", False) traceln(lJobIDs) lretJobIDs.extend(lJobIDs) traceln("- LA running for page %d job:%s" % (i, lJobIDs)) return lretJobIDs def applyHTRForRegions(self, colid, docid, nbpages, modelname, dictionary): """ apply an htr model at region level """ htrComp = DoHtrRnn(self.myTrKCient.getServerUrl(), self.myTrKCient.getProxies()) htrComp._trpMng.setSessionId(self.myTrKCient.getSessionId()) htrComp.setSessionId(self.myTrKCient.getSessionId()) _, sPageDesc = htrComp.buildDescription(colid, "%s/%s" % (docid, nbpages)) sPages = "1-%d" % (nbpages) sModelID = None # get modelID lColModels = self.myTrKCient.listRnns(colid) for model in lColModels: # print model['htrId'], type(model['htrId']), modelname,type(modelname) if str(model['htrId']) == str(modelname): sModelID = model['htrId'] traceln('model id = %s' % sModelID) #some old? models do not have params field # try: traceln("%s\t%s\t%s" % (model['htrId'],model['name'],model['params'])) # except KeyError: traceln("%s\t%s\tno params" % (model['htrId'],model['name'])) if sModelID == None: raise Exception("no model ID found for %s" % (modelname)) ret = htrComp.htrRnnDecode(colid, sModelID, dictionary, docid, sPageDesc, bDictTemp=False) traceln(ret) return ret def applyHTR(self, colid, docid, nbpages, modelname, dictionary): """ apply HTR on docid htr id is needed: we have htrmodename """ htrComp = DoHtrRnn(self.myTrKCient.getServerUrl(), self.myTrKCient.getProxies()) htrComp._trpMng.setSessionId(self.myTrKCient.getSessionId()) htrComp.setSessionId(self.myTrKCient.getSessionId()) _, sPageDesc = htrComp.buildDescription(colid, "%s/%s" % (docid, nbpages)) sPages = "1-%d" % (nbpages) sModelID = None # get modelID lColModels = self.myTrKCient.listRnns(colid) for model in lColModels: # print model['htrId'], type(model['htrId']), modelname,type(modelname) if str(model['htrId']) == str(modelname): sModelID = model['htrId'] traceln('model id = %s' % sModelID) #some old? models do not have params field # try: traceln("%s\t%s\t%s" % (model['htrId'],model['name'],model['params'])) # except KeyError: traceln("%s\t%s\tno params" % (model['htrId'],model['name'])) if sModelID == None: raise Exception("no model ID found for %s" % (modelname)) ret = htrComp.htrRnnDecode(colid, sModelID, dictionary, docid, sPageDesc, bDictTemp=False) traceln(ret) return ret def overlapX(self, zoneA, zoneB): [x11, y11, x12, y12] = zoneA.getBoundingBox( ) #self.getX(),self.getY(),self.getHeight(),self.getWidth() [x21, y21, x22, y22] = zoneB.getBoundingBox() [a1, a2] = x11, x12 [b1, b2] = x21, x22 #zoneB.getX(),zoneB.getX()+ zoneB.getWidth() return min(a2, b2) >= max(a1, b1) def overlapY(self, zoneA, zoneB): [x11, y11, x12, y12] = zoneA.getBoundingBox( ) #self.getX(),self.getY(),self.getHeight(),self.getWidth() [x21, y21, x22, y22] = zoneB.getBoundingBox() [a1, a2] = y11, y12 [b1, b2] = y22, y22 #zone.getY(),zone.getY() + zone.getHeight() return min(a2, b2) >= max(a1, b1) def signedOverlap(self, zoneA, zoneB): """ overlap self and zone return surface of self in zone """ [x11, y11, x12, y12] = zoneA.getBoundingBox( ) #self.getX(),self.getY(),self.getHeight(),self.getWidth() [x21, y21, x22, y22] = zoneB.getBoundingBox( ) #.getX(),zone.getY(),zone.getHeight(),zone.getWidth() w1 = x12 - x11 h1 = y12 - y11 fOverlap = 0.0 if self.overlapX(zoneA, zoneB) and self.overlapY(zoneA, zoneB): s1 = w1 * h1 # possible ? if s1 == 0: s1 = 1.0 #intersection nx1 = max(x11, x21) nx2 = min(x12, x22) ny1 = max(y11, y21) ny2 = min(y12, y22) h = abs(nx2 - nx1) w = abs(ny2 - ny1) inter = h * w if inter > 0: fOverlap = inter / s1 else: # if overX and Y this is not possible ! fOverlap = 0.0 return fOverlap def mergeBaselineCells(self, coldir, colid, docid): """ Take a file (pxml) with stuff processed on Transkribus Tale the CVL template tool xml (xml) merge them regenerate a mpxml """ xmlpath = os.path.abspath(os.path.join(coldir, sCOL, docid)) # print (xmlpath) mpxml = xmlpath + ".mpxml" mpxmldoc = etree.parse(mpxml) lxml = glob.glob(os.path.join(xmlpath, "*.xml")) pxmldoc = MultiPageXml.makeMultiPageXml(lxml) lhtrxml = glob.glob(os.path.join(xmlpath, "*.pxml")) mpxmldoc = MultiPageXml.makeMultiPageXml(lhtrxml) lPXMLPage = PageXml.getChildByName(mpxmldoc.getroot(), 'Page') lXMLPage = PageXml.getChildByName(pxmldoc.getroot(), 'Page') assert len(lXMLPage) == len(lPXMLPage) for i, cvlpage in enumerate(lXMLPage): ## remove TextRegion from xcvlpage lTextRegions = PageXml.getChildByName(cvlpage, 'TextRegion') for tr in lTextRegions: tr.getparent().remove(tr) pxmlpage = lPXMLPage[i] lTL = [] lTextRegions = PageXml.getChildByName(pxmlpage, 'TextRegion') for x in lTextRegions: lTL.extend(PageXml.getChildByName(x, 'TextLine')) ltable = PageXml.getChildByName(cvlpage, 'TableRegion') if len(ltable) == 0: raise "NO TABLE" lCells = PageXml.getChildByName(ltable[0], 'TableCell') lC = [Polygon(PageXml.getPointList(c)) for c in lCells] lT = [Polygon(PageXml.getPointList(t)) for t in lTL] for i, tl in enumerate(lT): ## normalization lCoordsPoints = PageXml.getChildByName(lTL[i], 'Coords') lCoordsB = PageXml.getChildByName(lTL[i], 'Baseline') coordB = lCoordsB[0] coord = lCoordsPoints[0] iHeight = 30 # in pixel x1, y1, x2, y2 = Polygon( PageXml.getPointList(coordB)).getBoundingBox() if coord is not None: coord.set( 'points', "%d,%d %d,%d %d,%d %d,%d" % (x1, y1 - iHeight, x2, y1 - iHeight, x2, y2, x1, y2)) tl = Polygon(PageXml.getPointList(coordB)) lOverlap = [] for _, c in enumerate(lC): # print (lCells[j].get('row'),lCells[j].get('col'), self.signedOverlap(c,tl),tl.getBoundingBox(),c.getBoundingBox()) lOverlap.append(self.signedOverlap( c, tl)) #.getBoundingBox())) ## region of the same size as the textline # print (j,max(lOverlap),lOverlap.index(max(lOverlap))) if max(lOverlap) == 0: region = PageXml.createPageXmlNode('TextRegion') cvlpage.append(region) region.append(lTL[i]) else: cell = lCells[lOverlap.index(max(lOverlap))] cell.append(lTL[i]) # print (cell.get('row'),cell.get('col'),''.join(lTL[i].itertext())) pxmldoc.write(mpxml) """ lOverlap=[] for region in lRegions: lOverlap.append(self.signedRatioOverlap(region)) if max(lOverlap) == 0: return None return lRegions[lOverlap.index(max(lOverlap))] """ """ fOverlap = 0.0 if self.overlapX(zone) and self.overlapY(zone): [x11,y11,x12,y12] = [x1,y1,x1+w1,y1+h1] [x21,y21,x22,y22] = [x2,y2,x2+w2,y2+h2] s1 = w1 * h1 # possible ? if s1 == 0: s1 = 1.0 #intersection nx1 = max(x11,x21) nx2 = min(x12,x22) ny1 = max(y11,y21) ny2 = min(y12,y22) h = abs(nx2 - nx1) w = abs(ny2 - ny1) inter = h * w if inter > 0 : fOverlap = inter/s1 else: # if overX and Y this is not possible ! fOverlap = 0.0 return fOverlap """ def extractFileNamesFromMPXML(self, mpxmldoc): """ to insure correct file order ! duplicated form performCVLLA.py """ xmlpath = os.path.abspath(os.path.join(self.coldir, sCOL, self.docid)) lNd = PageXml.getChildByName(mpxmldoc.getRootElement(), 'Page') # for i in lNd:print i return map( lambda x: "%s%s%s.xml" % (xmlpath, os.sep, x.prop('imageFilename')[:-4]), lNd) def processDocument(self, coldir, colid, docid, dom=None): """ process a single document 1 python ../../src/xml_formats/PageXml.py trnskrbs_5400/col/17442 --ext=pxml 2 python ../../src/tasks/performCVLLA.py --coldir=trnskrbs_5400/ --docid=17442 -i trnskrbs_5400/col/17442.mpxml --bl --regTL --form 3 python ../../src/tasks/DU_ABPTable_T.py modelMultiType tableRow2 --run=trnskrbs_5400 4 python ../../src/xml_formats/Page2DS.py --pattern=trnskrbs_5400/col/17442_du.mpxml -o trnskrbs_5400/xml/17442.ds_xml --docid=17442 5 python src/IE_test.py -i trnskrbs_5400/xml/17442.ds_xml -o trnskrbs_5400/out/17442.ds_xml 6 python ../../../TranskribusPyClient/src/TranskribusCommands/TranskribusDU_transcriptUploader.py --nodu trnskrbs_5400 5400 17442 7 python ../../../TranskribusPyClient/src/TranskribusCommands/do_htrRnn.py <model-name> <dictionary-name> 5400 17442 wait 8 python ../../../TranskribusPyClient/src/TranskribusCommands/Transkribus_downloader.py 5400 --force #covnert to ds 9 python ../../src/xml_formats/Page2DS.py --pattern=trnskrbs_5400/col/17442.mpxml -o trnskrbs_5400/xml/17442.ds_xml --docid=17442 10 python src/IE_test.py -i trnskrbs_5400/xml/17442.ds_xml -o trnskrbs_5400/out/17442.ds_xml --doie --usetemplate """ #create Transkribus client self.myTrKCient = TranskribusClient(sServerUrl=self.server, proxies={}, loggingLevel=logging.WARN) #login _ = self.login(self.myTrKCient, trace=trace, traceln=traceln) # self.downloadCollection(colid,coldir,docid,bNoImg=False,bForce=True) ## load dom if dom is None: self.inputFileName = os.path.abspath( os.path.join(coldir, TableProcessing.sCOL, docid + TableProcessing.sMPXMLExtension)) mpxml_doc = self.loadDom() nbPages = MultiPageXml.getNBPages(mpxml_doc) else: # load provided mpxml mpxml_doc = dom nbPages = MultiPageXml.getNBPages(mpxml_doc) # ### table registration: need to compute/select??? the template # # perform LA separator, table registration, baseline with normalization # #python ../../src/tasks/performCVLLA.py --coldir=trnskrbs_5400/ --docid=17442 -i trnskrbs_5400/col/17442.mpxml --bl --regTL --form # tableregtool= LAProcessor() # # latool.setParams(dParams) # tableregtool.coldir = coldir # tableregtool.docid = docid # tableregtool.bTemplate, tableregtool.bSeparator , tableregtool.bBaseLine , tableregtool.bRegularTextLine = True,False,False,False # # creates xml and a new mpxml # mpxml_doc,nbPages = tableregtool.performLA(mpxml_doc) # # # self.upLoadDocument(colid, coldir,docid,sNote='NLE workflow;table reg done') lJobIDs = self.apply_URO(colid, docid, nbPages) return bWait = True assert lJobIDs != [] jobid = lJobIDs[-1] traceln("waiting for job %s" % jobid) while bWait: dInfo = self.myTrKCient.getJobStatus(jobid) bWait = dInfo['state'] not in ['FINISHED', 'FAILED'] ## coldir??? self.downloadCollection(colid, coldir, docid, bNoImg=True, bForce=True) ##STOP HERE FOR DAS newx testset: return # tag text for BIES cell #python ../../src/tasks/DU_ABPTable_T.py modelMultiType tableRow2 --run=trnskrbs_5400 """ needed : doer = DU_ABPTable_TypedCRF(sModelName, sModelDir, """ doer = DU_ABPTable_TypedCRF(self.sRowModelName, self.sRowModelDir) doer.load() ## needed predict at file level, and do not store dom, but return it rowpath = os.path.join(coldir, "col") BIESFiles = doer.predict([rowpath], docid) BIESDom = self.loadDom(BIESFiles[0]) # res= BIESDom.saveFormatFileEnc('test.mpxml', "UTF-8",True) # MPXML2DS #python ../../src/xml_formats/Page2DS.py --pattern=trnskrbs_5400/col/17442_du.mpxml -o trnskrbs_5400/xml/17442.ds_xml --docid=17442 dsconv = primaAnalysis() DSBIESdoc = dsconv.convert2DS(BIESDom, self.docid) # create XMLDOC object self.ODoc = XMLDSDocument() self.ODoc.loadFromDom( DSBIESdoc) #,listPages = range(self.firstPage,self.lastPage+1)) # create row #python src/IE_test.py -i trnskrbs_5400/xml/17442.ds_xml -o trnskrbs_5400/out/17442.ds_xml rdc = RowDetection() rdc.findRowsInDoc(self.ODoc) #python ../../src/xml_formats/DS2PageXml.py -i trnskrbs_5400/out/17442.ds_xml --multi # DS2MPXML DS2MPXML = DS2PageXMLConvertor() lPageXml = DS2MPXML.run(self.ODoc.getDom()) if lPageXml != []: # if DS2MPXML.bMultiPages: newDoc = MultiPageXml.makeMultiPageXmlMemory( map(lambda xy: xy[0], lPageXml)) outputFileName = os.path.join( self.coldir, sCOL, self.docid + TableProcessing.sMPXMLExtension) newDoc.write(outputFileName, xml_declaration=True, encoding="UTF-8", pretty_print=True) # else: # DS2MPXML.storePageXmlSetofFiles(lPageXml) return #upload # python ../../../TranskribusPyClient/src/TranskribusCommands/TranskribusDU_transcriptUploader.py --nodu trnskrbs_5400 5400 17442 self.upLoadDocument(colid, coldir, docid, sNote='NLE workflow;table row done') ## apply HTR ## how to deal with specific dictionaries? ## here need to know the ontology and the template ## OPTION: put it after LA on server (just one download needed ) nbPages = 1 jobid = self.applyHTR(colid, docid, nbPages, self.sHTRmodel, self.sDictName) bWait = True traceln("waiting for job %s" % jobid) while bWait: dInfo = self.myTrKCient.getJobStatus(jobid) bWait = dInfo['state'] not in ['FINISHED', 'FAILED', 'CANCELED'] # download where??? # python ../../../TranskribusPyClient/src/TranskribusCommands/Transkribus_downloader.py 5400 --force # coldir is not right!! coldir must refer to the parent folder! self.downloadCollection(colid, coldir, docid, bNoImg=True, bForce=True) #done!! # IE extr ## not here: specific to a usecas #python src/IE_test.py -i trnskrbs_5400/xml/17442.ds_xml -o trnskrbs_5400/out/17442.ds_xml --doie --usetemplate def processCollection(self, coldir): """ process all files in a colelction need mpxml files """ lsDocFilename = sorted( glob.iglob( os.path.join(coldir, "*" + TableProcessing.sMPXMLExtension))) lDocId = [] for sDocFilename in lsDocFilename: sDocId = os.path.basename( sDocFilename)[:-len(TableProcessing.sMPXMLExtension)] try: docid = int(sDocId) lDocId.append(docid) except ValueError: traceln("Warning: folder %s : %s invalid docid, IGNORING IT" % (self.coldir, sDocId)) continue # process each document for docid in lDocId: traceln("Processing %s : %s " % (self.coldir, sDocId)) self.processDocument(self.colid, docid) traceln("\tProcessing done for %s " % (self.coldir, sDocId)) def processParameters(self): """ what to do with the parameters provided by the command line """ if self.colid is None: print('collection id missing!') sys.exit(1) self.bFullCol = self.docid != None if self.bRegenerateMPXML and self.docid is not None: l = glob.glob(os.path.join(self.coldir, sCOL, self.docid, "*.pxml")) doc = MultiPageXml.makeMultiPageXml(l) outputFileName = os.path.join( self.coldir, sCOL, self.docid + TableProcessing.sMPXMLExtension) doc.write(outputFileName, xml_declaration=True, encoding="UTF-8", pretty_print=True) return doc return None def run(self): """ process at collection level or document level """ newMPXML = self.processParameters() if self.bFullCol is None: self.processCollection(self.colid) else: if self.bUROCVLMerge: self.mergeBaselineCells(self.coldir, self.colid, self.docid) return self.processDocument(self.coldir, self.colid, self.docid, newMPXML)
def processDocument(self, coldir, colid, docid, dom=None): """ process a single document 1 python ../../src/xml_formats/PageXml.py trnskrbs_5400/col/17442 --ext=pxml 2 python ../../src/tasks/performCVLLA.py --coldir=trnskrbs_5400/ --docid=17442 -i trnskrbs_5400/col/17442.mpxml --bl --regTL --form 3 python ../../src/tasks/DU_ABPTable_T.py modelMultiType tableRow2 --run=trnskrbs_5400 4 python ../../src/xml_formats/Page2DS.py --pattern=trnskrbs_5400/col/17442_du.mpxml -o trnskrbs_5400/xml/17442.ds_xml --docid=17442 5 python src/IE_test.py -i trnskrbs_5400/xml/17442.ds_xml -o trnskrbs_5400/out/17442.ds_xml 6 python ../../../TranskribusPyClient/src/TranskribusCommands/TranskribusDU_transcriptUploader.py --nodu trnskrbs_5400 5400 17442 7 python ../../../TranskribusPyClient/src/TranskribusCommands/do_htrRnn.py <model-name> <dictionary-name> 5400 17442 wait 8 python ../../../TranskribusPyClient/src/TranskribusCommands/Transkribus_downloader.py 5400 --force #covnert to ds 9 python ../../src/xml_formats/Page2DS.py --pattern=trnskrbs_5400/col/17442.mpxml -o trnskrbs_5400/xml/17442.ds_xml --docid=17442 10 python src/IE_test.py -i trnskrbs_5400/xml/17442.ds_xml -o trnskrbs_5400/out/17442.ds_xml --doie --usetemplate """ #create Transkribus client self.myTrKCient = TranskribusClient(sServerUrl=self.server, proxies={}, loggingLevel=logging.WARN) #login _ = self.login(self.myTrKCient, trace=trace, traceln=traceln) # self.downloadCollection(colid,coldir,docid,bNoImg=False,bForce=True) ## load dom if dom is None: self.inputFileName = os.path.abspath( os.path.join(coldir, TableProcessing.sCOL, docid + TableProcessing.sMPXMLExtension)) mpxml_doc = self.loadDom() nbPages = MultiPageXml.getNBPages(mpxml_doc) else: # load provided mpxml mpxml_doc = dom nbPages = MultiPageXml.getNBPages(mpxml_doc) # ### table registration: need to compute/select??? the template # # perform LA separator, table registration, baseline with normalization # #python ../../src/tasks/performCVLLA.py --coldir=trnskrbs_5400/ --docid=17442 -i trnskrbs_5400/col/17442.mpxml --bl --regTL --form # tableregtool= LAProcessor() # # latool.setParams(dParams) # tableregtool.coldir = coldir # tableregtool.docid = docid # tableregtool.bTemplate, tableregtool.bSeparator , tableregtool.bBaseLine , tableregtool.bRegularTextLine = True,False,False,False # # creates xml and a new mpxml # mpxml_doc,nbPages = tableregtool.performLA(mpxml_doc) # # # self.upLoadDocument(colid, coldir,docid,sNote='NLE workflow;table reg done') lJobIDs = self.applyLA_URO(colid, docid, nbPages) return bWait = True assert lJobIDs != [] jobid = lJobIDs[-1] traceln("waiting for job %s" % jobid) while bWait: dInfo = self.myTrKCient.getJobStatus(jobid) bWait = dInfo['state'] not in ['FINISHED', 'FAILED'] ## coldir??? self.downloadCollection(colid, coldir, docid, bNoImg=True, bForce=True) ##STOP HERE FOR DAS newx testset: return # tag text for BIES cell #python ../../src/tasks/DU_ABPTable_T.py modelMultiType tableRow2 --run=trnskrbs_5400 """ needed : doer = DU_ABPTable_TypedCRF(sModelName, sModelDir, """ doer = DU_ABPTable_TypedCRF(self.sRowModelName, self.sRowModelDir) doer.load() ## needed predict at file level, and do not store dom, but return it rowpath = os.path.join(coldir, "col") BIESFiles = doer.predict([rowpath], docid) BIESDom = self.loadDom(BIESFiles[0]) # res= BIESDom.saveFormatFileEnc('test.mpxml', "UTF-8",True) # MPXML2DS #python ../../src/xml_formats/Page2DS.py --pattern=trnskrbs_5400/col/17442_du.mpxml -o trnskrbs_5400/xml/17442.ds_xml --docid=17442 dsconv = primaAnalysis() DSBIESdoc = dsconv.convert2DS(BIESDom, self.docid) # create XMLDOC object self.ODoc = XMLDSDocument() self.ODoc.loadFromDom( DSBIESdoc) #,listPages = range(self.firstPage,self.lastPage+1)) # create row #python src/IE_test.py -i trnskrbs_5400/xml/17442.ds_xml -o trnskrbs_5400/out/17442.ds_xml rdc = RowDetection() rdc.findRowsInDoc(self.ODoc) #python ../../src/xml_formats/DS2PageXml.py -i trnskrbs_5400/out/17442.ds_xml --multi # DS2MPXML DS2MPXML = DS2PageXMLConvertor() lPageXml = DS2MPXML.run(self.ODoc.getDom()) if lPageXml != []: # if DS2MPXML.bMultiPages: newDoc = MultiPageXml.makeMultiPageXmlMemory( map(lambda xy: xy[0], lPageXml)) outputFileName = os.path.join( self.coldir, sCOL, self.docid + TableProcessing.sMPXMLExtension) newDoc.write(outputFileName, xml_declaration=True, encoding="UTF-8", pretty_print=True) # else: # DS2MPXML.storePageXmlSetofFiles(lPageXml) return #upload # python ../../../TranskribusPyClient/src/TranskribusCommands/TranskribusDU_transcriptUploader.py --nodu trnskrbs_5400 5400 17442 self.upLoadDocument(colid, coldir, docid, sNote='NLE workflow;table row done') ## apply HTR ## how to deal with specific dictionaries? ## here need to know the ontology and the template nbPages = 1 jobid = self.applyHTR(colid, docid, nbPages, self.sHTRmodel, self.sDictName) bWait = True traceln("waiting for job %s" % jobid) while bWait: dInfo = self.myTrKCient.getJobStatus(jobid) bWait = dInfo['state'] not in ['FINISHED', 'FAILED', 'CANCELED'] # download where??? # python ../../../TranskribusPyClient/src/TranskribusCommands/Transkribus_downloader.py 5400 --force # coldir is not right!! coldir must refer to the parent folder! self.downloadCollection(colid, coldir, docid, bNoImg=True, bForce=True)
import sys, os import logging try: #to ease the use without proper Python installation import TranskribusPyClient_version except ImportError: sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) ) import TranskribusPyClient_version from TranskribusPyClient.test import _colId_A from TranskribusPyClient.client import TranskribusClient, getStoredCredentials login, pwd = getStoredCredentials() conn = TranskribusClient(proxies={'https':'http://cornillon:8000'} , loggingLevel=logging.INFO) sessionID = conn.auth_login(login, pwd) doc = conn.listEditDeclFeatures(_colId_A) doc.saveFormatFileEnc("-", "UTF-8", True) conn.xmlFreeDoc(doc) print conn.auth_logout() """ <?xml version="1.0" encoding="UTF-8" standalone="yes"?> <edFeatures> <edFeature> <featureId>1</featureId> <title>Long S</title> <description>Source uses long "s"</description>
# -*- coding: utf-8 -*- #optional: useful if you want to choose the logging level to something else than logging.WARN import sys, os import logging try: #to ease the use without proper Python installation import TranskribusPyClient_version except ImportError: sys.path.append( os.path.dirname(os.path.dirname(os.path.abspath(sys.argv[0])))) import TranskribusPyClient_version from TranskribusPyClient.test import _coldId_Sandbox, _docId_a from TranskribusPyClient.client import TranskribusClient, getStoredCredentials login, pwd = getStoredCredentials() conn = TranskribusClient(proxies={'https': 'http://cornillon:8000'}, loggingLevel=logging.INFO) sessionID = conn.auth_login(login, pwd) data = conn.addDocToCollection(_coldId_Sandbox, _docId_a) """ True or Exception """ print conn.auth_logout()
# -*- coding: utf-8 -*- #optional: useful if you want to choose the logging level to something else than logging.WARN import sys, os import logging try: #to ease the use without proper Python installation import TranskribusPyClient_version except ImportError: sys.path.append( os.path.dirname(os.path.dirname(os.path.abspath(sys.argv[0])))) import TranskribusPyClient_version from TranskribusPyClient.test import _colId_A, _coldId_Sandbox, _docId_c, _docId_d from TranskribusPyClient.client import TranskribusClient, getStoredCredentials login, pwd = getStoredCredentials() conn = TranskribusClient(proxies={'https': 'http://cornillon:8000'}, loggingLevel=logging.INFO) sessionID = conn.auth_login(login, pwd) data = conn.duplicateDoc(_colId_A, _docId_c, _coldId_Sandbox, "named_by_JL") data = conn.duplicateDoc(_colId_A, _docId_d, _coldId_Sandbox) """ True or Exception """ print conn.auth_logout()
import sys, os import logging try: #to ease the use without proper Python installation import TranskribusPyClient_version except ImportError: sys.path.append( os.path.dirname(os.path.dirname( os.path.abspath(sys.argv[0]) )) ) import TranskribusPyClient_version from TranskribusPyClient.test import _colId_A, _docId_a from TranskribusPyClient.client import TranskribusClient, getStoredCredentials login, pwd = getStoredCredentials() conn = TranskribusClient(proxies={'https':'http://*****:*****@xrce.xerox.com'}]}, u'createdFromTimestamp': 33175290, u'createdToTimestamp': 33175290,
import sys, os import logging try: #to ease the use without proper Python installation import TranskribusPyClient_version except ImportError: sys.path.append( os.path.dirname(os.path.dirname(os.path.abspath(sys.argv[0])))) import TranskribusPyClient_version from TranskribusPyClient.test import _colId_A, _docId_a from TranskribusPyClient.client import TranskribusClient, getStoredCredentials login, pwd = getStoredCredentials() conn = TranskribusClient(proxies={'https': 'http://*****:*****@xrce.xerox.com", "trnjluc", sHttpsProxyUrl='http://cornillon:8000') # ret = conn.getDocumentFromServer(colid, docid) #ret = conn.getDocumentFromServer("3571", "7750") data = conn.getDocByIdAsXml(_colId_A, str(_docId_a)) #str just to stress-test #data = conn.getDocByIdAsXml(3571, "7750") print data """