示例#1
0
 def doSteps(self, doc, task, workflow, steps, **kw):
     data = kw.copy()
     data["task"] = task
     data["workflow"] = workflow
     # This is a comma-separated string.
     if (type(steps) is tuple) or (type(steps) is list):
         steps = ",".join(steps)
     data["steps"] = steps
     data["operation"] = "steps"
     data["file_type"] = "mat-json"
     data["input"] = _jsonIO.writeToByteSequence(doc)
     fp = urllib.urlopen(self.url, urllib.urlencode(data), self.proxies)
     s = fp.read()
     fp.close()
     # The string will be a JSON string, or it should be.
     try:
         d = json.loads(s)
     except ValueError:
         raise WebClientError, "CGI response isn't a JSON object"
     err = d.get("error")
     if err:
         raise WebClientError, ("Step %s failed: %s" %
                                (d.get("errorStep"), err))
     successes = d.get("successes")
     if not successes:
         raise WebClientError, "No error, but no successful document either"
     # Get the last one.
     finalSuccess = successes[-1]
     seedDocument = AnnotatedDoc()
     _jsonIO._deserializeFromJSON(finalSuccess["val"], seedDocument)
     return seedDocument
示例#2
0
文件: XMLIO.py 项目: govtmirror/DmD
 def _handleComment(self, data):
     m = self.METADATA_PAT.match(data)
     if m is None:
         if self.inputIsOverlay:
             # Pass through the comment.
             self._addSignalChunk("<!--")
             self._addSignalChunk(data)
             self._addSignalChunk("-->")
     else:
         # We've got the metadata.
         jsonMetadata = base64.b64decode(m.group(1))
         self.annotDoc.metadata = json.loads(jsonMetadata)
示例#3
0
    def document_comparison(self, labels=None, **kw):

        result = {"success": True, "error": None}

        success, errStr, res = self._checkTaskInformation(
            [], workflowCanBeNull=True, **kw)

        if not success:
            result["success"] = False
            result["error"] = errStr

        else:

            plugins, pDir, TASK_OBJ, INPUT, WORKFLOW = res
            # INPUT is a string which is a LIST of document JSON objects.
            from MAT import json
            docs = None
            try:
                docs = []
                for d in json.loads(INPUT.decode('utf-8')):
                    doc = TASK_OBJ.newDocument()
                    docs.append(doc)
                    _jsonIO._deserializeFromJSON(d, doc)
            except MAT.Document.LoadError, e:
                result["success"] = False
                result["error"] = str(e)

            if docs is not None:
                # So here, what we do is create a comparison document.
                from MAT.ComparisonDocument import generateComparisonDocument
                pivotLabel = None
                otherLabels = None
                if labels:
                    pivotLabel = labels[0]
                    otherLabels = labels[1:]
                try:
                    compDoc = generateComparisonDocument(
                        TASK_OBJ,
                        docs[0],
                        docs[1:],
                        pivotLabel=pivotLabel,
                        otherLabels=otherLabels)

                    result["doc"] = _jsonIO.renderJSONObj(compDoc)
                except MAT.Pair.PairingError, e:
                    result["success"] = False
                    result["error"] = str(e)
示例#4
0
    def document_reconciliation(self, **kw):

        result = {"success": True, "error": None}

        success, errStr, res = self._checkTaskInformation(
            [], workflowCanBeNull=True, **kw)

        if not success:
            result["success"] = False
            result["error"] = errStr

        else:

            plugins, pDir, TASK_OBJ, INPUT, WORKFLOW = res
            # INPUT is a string which is a LIST of document JSON objects.
            from MAT import json
            docs = None
            try:
                docs = []
                for d in json.loads(INPUT.decode('utf-8')):
                    doc = TASK_OBJ.newDocument()
                    docs.append(doc)
                    _jsonIO._deserializeFromJSON(d, doc)
            except MAT.Document.LoadError, e:
                result["success"] = False
                result["error"] = str(e)

            if docs is not None:
                # So here, what we do is create a reconciliation document. The issue with
                # this is that we need to figure out which portions of the incoming
                # documents should be considered "gold". We may want an option to
                # preserve the incoming segmentation, and otherwise just assign a single
                # document-size segment with the document itself as the annotator.

                # We'll have two options to save: either to save this directly as
                # a reconciliation document, or to export it as a reconciled document.
                # The other issue is what happens when we load - the reconciliation document
                # should automatically open a reconciliation pane, which means that
                # we'd need to deal with the panes in the load callback, rather than
                # in the load prep. But otherwise, how do I load a reconciliation document?
                # I'd need a separate menu item. Hmmm.
                from MAT.ReconciliationDocument import ReconciliationDoc

                # Preprocess the documents. In this case, all the documents
                # must be marked human gold, and the annotator should be
                # the document itself.
                i = 1
                wholeZoneStep = None
                for doc in docs:
                    annotator = "doc" + str(i)
                    i += 1
                    segs = doc.getAnnotations(["SEGMENT"])
                    if not segs:
                        zones = doc.getAnnotations(
                            TASK_OBJ.getAnnotationTypesByCategory("zone"))
                        if zones:
                            # Segment it.
                            for z in zones:
                                doc.createAnnotation(
                                    z.start, z.end, "SEGMENT", {
                                        "annotator": annotator,
                                        "status": "human gold"
                                    })
                        else:
                            # If there are no zones and no segments, then make one big zone and segment.
                            if not wholeZoneStep:
                                wholeZoneStep = MAT.PluginMgr.WholeZoneStep(
                                    "zone", TASK_OBJ, None)
                            wholeZoneStep.do(doc)
                            for seg in doc.getAnnotations(["SEGMENT"]):
                                seg["annotator"] = annotator
                                seg["status"] = "human gold"
                    else:
                        for seg in segs:
                            seg["annotator"] = annotator
                            seg["status"] = "human gold"

                recDoc = ReconciliationDoc.generateReconciliationDocument(
                    TASK_OBJ, docs, verbose=None)

                # Now, every segment in the reconciliation doc should be marked "to review".
                # NO! Only the segments which are human gold.
                for seg in recDoc.getAnnotations(["SEGMENT"]):
                    if seg["status"] == "human gold":
                        seg["to_review"] = "yes"

                result["doc"] = _jsonIO.renderJSONObj(recDoc)
示例#5
0
    def save_log(self, log=None, **kw):

        # I'm going to do the log mangling here, because it's really not
        # relevant to anything else in the system. Not much; originally I
        # was doing some raw logging in the frontend and augmenting it
        # here, but that turned out to be unwieldy, so I moved all the
        # ugliness into the frontend, and here we just turn it into CSV.

        from MAT import json
        log = json.loads(_getfirst(log))
        import datetime
        fname = datetime.datetime.now().strftime("log_%Y%m%d_%H_%M_%S.csv")

        # The logs consist of a couple messages which the Yahoo logger itself
        # provides, but mostly ours. It's a list of hashes. Most of them have
        # details like gesture, file, etc., but some like log_start, etc., don't.

        # NOTE: the msg entries will only be objects for the log
        # elements we saved. We'll also be getting other stuff from
        # the log, which are just strings. We'll be skipping these.

        convertedLogs = []
        headers = [
            "timestamp", "rel_seconds", "gesture", "file", "folder",
            "workspace", "window", "action"
        ]
        extraHeaders = []

        startTime = None

        import time

        for msg in log:
            # Let's not deal with that awful overflow.
            # Let's do our calculations in ms, and then
            # move the decimal point.
            t = msg["ms"]
            if startTime is None:
                startTime = t
                diffTime = "0.0"
            else:
                # I want to ensure that the time is
                # consistently marked in ms.
                diffTime = "%04d" % (t - startTime, )
                diffTime = diffTime[:-3] + "." + diffTime[-3:]
            remainderStr = "%.03f" % (float(t) / 1000.0, )
            remainder = remainderStr[remainderStr.find("."):]
            # For some reason, Excel barfs on the time string when
            # it has a space in it.
            ts = time.strftime("%Y-%m-%dT%H:%M:%S",
                               time.gmtime(float(t) / 1000.0)) + remainder
            # Now, get rid of the milliseconds.
            del msg["ms"]
            msg["timestamp"] = ts
            msg["rel_seconds"] = diffTime

            for key in msg.keys():
                if (key not in headers) and (key not in extraHeaders):
                    extraHeaders.append(key)

            convertedLogs.append(msg)

        extraHeaders.sort()

        hDict = dict([(a, a) for a in headers + extraHeaders])

        convertedLogs[0:0] = [hDict]

        import csv, cStringIO

        output = cStringIO.StringIO()

        csv.DictWriter(output, headers + extraHeaders,
                       "").writerows(convertedLogs)
        v = output.getvalue()
        output.close()
        return {"success": True, "bytes": v, "filename": fname}
示例#6
0
 def rollForward(self, stopAt = None, verbose = False):
     # Roll forward. 
     # The entries in the DB log will be in order, one entry per transaction.        
     fp = codecs.open(os.path.join(self.wsLog, "event_log"), "r", "utf-8")
     db = self.ws.getDB()
     currentTransaction = None
     oldState = self._readState()
     skip = (oldState is not None)
     if verbose:
         self._reportRerunState()
     _jsonIO = MAT.DocumentIO.getDocumentIO('mat-json', task = self.ws.task)
     for line in fp.readlines():
         jDict = json.loads(line.strip())
         if skip:
             if jDict["timestamp"] == oldState:
                 skip = False
             continue
         if jDict["timestamp"] == stopAt:
             break
         p = os.path.join(self.wsLog, jDict["timestamp"])
         # The file actions are in order.
         for a in jDict["actions"]:
             if a[0] == "save":
                 fName, count, docName = a[1:]
                 fp = codecs.open(os.path.join(p, "saves", "%s_%d_%s" % (fName, count, docName)), "r", "utf-8")
                 frag = json.loads(fp.read())
                 fp.close()
                 # The document will already be there, because assign copies and
                 # removes first.
                 fp = codecs.open(os.path.join(self.ws.folders[fName].dir, docName), "r", "utf-8")
                 docJson = json.loads(fp.read())
                 fp.close()
                 # Now, update the asets.
                 d = dict([(a["type"], a) for a in docJson["asets"]])
                 for a in frag["asets"]:
                     d[a["type"]] = a
                 docJson["asets"] = d.values()
                 fp = codecs.open(os.path.join(self.ws.folders[fName].dir, docName), "w", "utf-8")
                 fp.write(json.dumps(docJson, ensure_ascii = False))
             elif a[0] == "import":
                 fName, count, basenames = a[1:]
                 for b in basenames:
                     shutil.copyfile(os.path.join(p, "imports", "%s_%d_%s" % (fName, count, b)),
                                     os.path.join(self.ws.folders[fName].dir, b))
             elif a[0] == "remove":
                 fName, count, trueBasename = a[1:]
                 os.remove(os.path.join(self.ws.folders[fName].dir, trueBasename))
             elif a[0] == "move":
                 fName, count, targetFolderName, trueBasenames = a[1:]
                 tFolder = self.ws.folders[targetFolderName]
                 sFolder = self.ws.folders[fName]
                 for b in trueBasenames:
                     shutil.move(os.path.join(sFolder.dir, b), os.path.join(tFolder.dir, b))
             elif a[0] == "copy":
                 sName, count, sourceBasename, tName, targetBasename = a[1:]
                 shutil.copyfile(os.path.join(self.ws.folders[sName].dir, sourceBasename),
                                 os.path.join(self.ws.folders[tName].dir, targetBasename))
             elif a[0] == "enable_prioritization":
                 self.ws._db = db
                 self.ws._enablePrioritization(a[2])
                 db = self.ws.getDB()
             elif a[0] == "disable_prioritization":
                 self.ws._db = db
                 self.ws._disablePrioritization()
                 db = self.ws.getDB()                    
         db.beginTransaction()
         for q, args, isMany in jDict["db_updates"]:
             db._execute(q, params = args, many = isMany, retrieval = False)
         db.commitTransaction()
         self._writeState(jDict["timestamp"])
         if verbose:
             self._reportRerunState()
         # transaction-final operations.
         self._doTransactionOperations(jDict)
     self.ws.closeDB()
     fp.close()