def doSteps(self, doc, task, workflow, steps, **kw): data = kw.copy() data["task"] = task data["workflow"] = workflow # This is a comma-separated string. if (type(steps) is tuple) or (type(steps) is list): steps = ",".join(steps) data["steps"] = steps data["operation"] = "steps" data["file_type"] = "mat-json" data["input"] = _jsonIO.writeToByteSequence(doc) fp = urllib.urlopen(self.url, urllib.urlencode(data), self.proxies) s = fp.read() fp.close() # The string will be a JSON string, or it should be. try: d = json.loads(s) except ValueError: raise WebClientError, "CGI response isn't a JSON object" err = d.get("error") if err: raise WebClientError, ("Step %s failed: %s" % (d.get("errorStep"), err)) successes = d.get("successes") if not successes: raise WebClientError, "No error, but no successful document either" # Get the last one. finalSuccess = successes[-1] seedDocument = AnnotatedDoc() _jsonIO._deserializeFromJSON(finalSuccess["val"], seedDocument) return seedDocument
def _handleComment(self, data): m = self.METADATA_PAT.match(data) if m is None: if self.inputIsOverlay: # Pass through the comment. self._addSignalChunk("<!--") self._addSignalChunk(data) self._addSignalChunk("-->") else: # We've got the metadata. jsonMetadata = base64.b64decode(m.group(1)) self.annotDoc.metadata = json.loads(jsonMetadata)
def document_comparison(self, labels=None, **kw): result = {"success": True, "error": None} success, errStr, res = self._checkTaskInformation( [], workflowCanBeNull=True, **kw) if not success: result["success"] = False result["error"] = errStr else: plugins, pDir, TASK_OBJ, INPUT, WORKFLOW = res # INPUT is a string which is a LIST of document JSON objects. from MAT import json docs = None try: docs = [] for d in json.loads(INPUT.decode('utf-8')): doc = TASK_OBJ.newDocument() docs.append(doc) _jsonIO._deserializeFromJSON(d, doc) except MAT.Document.LoadError, e: result["success"] = False result["error"] = str(e) if docs is not None: # So here, what we do is create a comparison document. from MAT.ComparisonDocument import generateComparisonDocument pivotLabel = None otherLabels = None if labels: pivotLabel = labels[0] otherLabels = labels[1:] try: compDoc = generateComparisonDocument( TASK_OBJ, docs[0], docs[1:], pivotLabel=pivotLabel, otherLabels=otherLabels) result["doc"] = _jsonIO.renderJSONObj(compDoc) except MAT.Pair.PairingError, e: result["success"] = False result["error"] = str(e)
def document_reconciliation(self, **kw): result = {"success": True, "error": None} success, errStr, res = self._checkTaskInformation( [], workflowCanBeNull=True, **kw) if not success: result["success"] = False result["error"] = errStr else: plugins, pDir, TASK_OBJ, INPUT, WORKFLOW = res # INPUT is a string which is a LIST of document JSON objects. from MAT import json docs = None try: docs = [] for d in json.loads(INPUT.decode('utf-8')): doc = TASK_OBJ.newDocument() docs.append(doc) _jsonIO._deserializeFromJSON(d, doc) except MAT.Document.LoadError, e: result["success"] = False result["error"] = str(e) if docs is not None: # So here, what we do is create a reconciliation document. The issue with # this is that we need to figure out which portions of the incoming # documents should be considered "gold". We may want an option to # preserve the incoming segmentation, and otherwise just assign a single # document-size segment with the document itself as the annotator. # We'll have two options to save: either to save this directly as # a reconciliation document, or to export it as a reconciled document. # The other issue is what happens when we load - the reconciliation document # should automatically open a reconciliation pane, which means that # we'd need to deal with the panes in the load callback, rather than # in the load prep. But otherwise, how do I load a reconciliation document? # I'd need a separate menu item. Hmmm. from MAT.ReconciliationDocument import ReconciliationDoc # Preprocess the documents. In this case, all the documents # must be marked human gold, and the annotator should be # the document itself. i = 1 wholeZoneStep = None for doc in docs: annotator = "doc" + str(i) i += 1 segs = doc.getAnnotations(["SEGMENT"]) if not segs: zones = doc.getAnnotations( TASK_OBJ.getAnnotationTypesByCategory("zone")) if zones: # Segment it. for z in zones: doc.createAnnotation( z.start, z.end, "SEGMENT", { "annotator": annotator, "status": "human gold" }) else: # If there are no zones and no segments, then make one big zone and segment. if not wholeZoneStep: wholeZoneStep = MAT.PluginMgr.WholeZoneStep( "zone", TASK_OBJ, None) wholeZoneStep.do(doc) for seg in doc.getAnnotations(["SEGMENT"]): seg["annotator"] = annotator seg["status"] = "human gold" else: for seg in segs: seg["annotator"] = annotator seg["status"] = "human gold" recDoc = ReconciliationDoc.generateReconciliationDocument( TASK_OBJ, docs, verbose=None) # Now, every segment in the reconciliation doc should be marked "to review". # NO! Only the segments which are human gold. for seg in recDoc.getAnnotations(["SEGMENT"]): if seg["status"] == "human gold": seg["to_review"] = "yes" result["doc"] = _jsonIO.renderJSONObj(recDoc)
def save_log(self, log=None, **kw): # I'm going to do the log mangling here, because it's really not # relevant to anything else in the system. Not much; originally I # was doing some raw logging in the frontend and augmenting it # here, but that turned out to be unwieldy, so I moved all the # ugliness into the frontend, and here we just turn it into CSV. from MAT import json log = json.loads(_getfirst(log)) import datetime fname = datetime.datetime.now().strftime("log_%Y%m%d_%H_%M_%S.csv") # The logs consist of a couple messages which the Yahoo logger itself # provides, but mostly ours. It's a list of hashes. Most of them have # details like gesture, file, etc., but some like log_start, etc., don't. # NOTE: the msg entries will only be objects for the log # elements we saved. We'll also be getting other stuff from # the log, which are just strings. We'll be skipping these. convertedLogs = [] headers = [ "timestamp", "rel_seconds", "gesture", "file", "folder", "workspace", "window", "action" ] extraHeaders = [] startTime = None import time for msg in log: # Let's not deal with that awful overflow. # Let's do our calculations in ms, and then # move the decimal point. t = msg["ms"] if startTime is None: startTime = t diffTime = "0.0" else: # I want to ensure that the time is # consistently marked in ms. diffTime = "%04d" % (t - startTime, ) diffTime = diffTime[:-3] + "." + diffTime[-3:] remainderStr = "%.03f" % (float(t) / 1000.0, ) remainder = remainderStr[remainderStr.find("."):] # For some reason, Excel barfs on the time string when # it has a space in it. ts = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime(float(t) / 1000.0)) + remainder # Now, get rid of the milliseconds. del msg["ms"] msg["timestamp"] = ts msg["rel_seconds"] = diffTime for key in msg.keys(): if (key not in headers) and (key not in extraHeaders): extraHeaders.append(key) convertedLogs.append(msg) extraHeaders.sort() hDict = dict([(a, a) for a in headers + extraHeaders]) convertedLogs[0:0] = [hDict] import csv, cStringIO output = cStringIO.StringIO() csv.DictWriter(output, headers + extraHeaders, "").writerows(convertedLogs) v = output.getvalue() output.close() return {"success": True, "bytes": v, "filename": fname}
def rollForward(self, stopAt = None, verbose = False): # Roll forward. # The entries in the DB log will be in order, one entry per transaction. fp = codecs.open(os.path.join(self.wsLog, "event_log"), "r", "utf-8") db = self.ws.getDB() currentTransaction = None oldState = self._readState() skip = (oldState is not None) if verbose: self._reportRerunState() _jsonIO = MAT.DocumentIO.getDocumentIO('mat-json', task = self.ws.task) for line in fp.readlines(): jDict = json.loads(line.strip()) if skip: if jDict["timestamp"] == oldState: skip = False continue if jDict["timestamp"] == stopAt: break p = os.path.join(self.wsLog, jDict["timestamp"]) # The file actions are in order. for a in jDict["actions"]: if a[0] == "save": fName, count, docName = a[1:] fp = codecs.open(os.path.join(p, "saves", "%s_%d_%s" % (fName, count, docName)), "r", "utf-8") frag = json.loads(fp.read()) fp.close() # The document will already be there, because assign copies and # removes first. fp = codecs.open(os.path.join(self.ws.folders[fName].dir, docName), "r", "utf-8") docJson = json.loads(fp.read()) fp.close() # Now, update the asets. d = dict([(a["type"], a) for a in docJson["asets"]]) for a in frag["asets"]: d[a["type"]] = a docJson["asets"] = d.values() fp = codecs.open(os.path.join(self.ws.folders[fName].dir, docName), "w", "utf-8") fp.write(json.dumps(docJson, ensure_ascii = False)) elif a[0] == "import": fName, count, basenames = a[1:] for b in basenames: shutil.copyfile(os.path.join(p, "imports", "%s_%d_%s" % (fName, count, b)), os.path.join(self.ws.folders[fName].dir, b)) elif a[0] == "remove": fName, count, trueBasename = a[1:] os.remove(os.path.join(self.ws.folders[fName].dir, trueBasename)) elif a[0] == "move": fName, count, targetFolderName, trueBasenames = a[1:] tFolder = self.ws.folders[targetFolderName] sFolder = self.ws.folders[fName] for b in trueBasenames: shutil.move(os.path.join(sFolder.dir, b), os.path.join(tFolder.dir, b)) elif a[0] == "copy": sName, count, sourceBasename, tName, targetBasename = a[1:] shutil.copyfile(os.path.join(self.ws.folders[sName].dir, sourceBasename), os.path.join(self.ws.folders[tName].dir, targetBasename)) elif a[0] == "enable_prioritization": self.ws._db = db self.ws._enablePrioritization(a[2]) db = self.ws.getDB() elif a[0] == "disable_prioritization": self.ws._db = db self.ws._disablePrioritization() db = self.ws.getDB() db.beginTransaction() for q, args, isMany in jDict["db_updates"]: db._execute(q, params = args, many = isMany, retrieval = False) db.commitTransaction() self._writeState(jDict["timestamp"]) if verbose: self._reportRerunState() # transaction-final operations. self._doTransactionOperations(jDict) self.ws.closeDB() fp.close()