Пример #1
0
def script_for_document(scriptjson, project, pid):
    """
    Modify the given script for a specific file.
    """
    doc = project.get_storage().get(pid)
    tree = script.Script(json.loads(scriptjson))
    validate_batch_script(tree)

    # get the input node and replace it with out path
    binname = ".bin".join(os.path.splitext(doc.image_label))
    recname = os.path.splitext(doc.image_label)[0] + ".html"
    oldinput = tree.get_nodes_by_attr("stage", stages.INPUT)[0]
    rec = tree.get_nodes_by_attr("stage", stages.RECOGNIZE)[0]
    # assume the binary is the first input to the recogniser
    bin = rec.input(0)

    input = tree.new_node("storage.DocImageFileIn",
                          doc.image_label,
                          params=[("project", project.pk), ("pid", pid)])
    recout = tree.add_node("storage.DocWriter",
                           recname,
                           params=[("project", project.pk), ("pid", pid),
                                   ("attribute", "transcript")])
    binout = tree.add_node("storage.DocWriter",
                           binname,
                           params=[("project", project.pk), ("pid", pid),
                                   ("attribute", "binary")])

    tree.replace_node(oldinput, input)
    recout.set_input(0, rec)
    binout.set_input(0, bin)
    return json.dumps(tree.serialize(), indent=2)
Пример #2
0
def run_preset(request):
    """
    Execute a script (sent as JSON).
    """
    evalnode = request.POST.get("node", "")
    jsondata = request.POST.get("script")
    nodes = json.loads(jsondata)
    tree = script.Script(nodes)
    errors = tree.validate()
    if errors:
        return HttpResponse(json.dumps(dict(
            status="VALIDATION",
            errors=errors,
        )), mimetype="application/json")

    term = tree.get_node(evalnode)
    if term is None:
        terms = tree.get_terminals()
        if not terms:
            return HttpResponse(json.dumps(dict(
                status="NOSCRIPT",
            )), mimetype="application/json")
        term = terms[0]
    async = OcrTask.run_celery_task("run.script", (evalnode, nodes,
            request.output_path, _cache_name(request)),
            untracked=True, asyncronous=True, queue="interactive")
    out = dict(
        node=evalnode,
        task_id=async.task_id,
        status=async.status,
        results=async.result
    )
    response = HttpResponse(mimetype="application/json")
    json.dump(out, response, ensure_ascii=False)
    return response
Пример #3
0
def run(nodelist, outpath):
    s = script.Script(nodelist)
    term = s.get_terminals()[0]
    print "Rendering to %s" % outpath
    os.environ["NODETREE_WRITE_FILEOUT"] = "1"
    out = s.add_node("util.FileOut", "Output",
            params=[("path", os.path.abspath(outpath))])
    out.set_input(0, term)
    out.eval()
Пример #4
0
 def validate_preset(self, data):
     this = json.loads(self.data)
     tree = script.Script(data)
     errors = []
     for name, preds in this.iteritems():
         for pred in preds:
             perrors = self.validate_predicate(name, pred, tree)
             if perrors:
                 errors.extend(perrors)
     return errors
Пример #5
0
 def test_invalid_scripts(self):
     """
     Test supposedly invalid script DO raise errors.
     """
     for name, nodes in self.invalidscripts.iteritems():
         if not name.startswith("invalid"):
             continue
         s = script.Script(nodes)
         terms = s.get_terminals()
         self.assertTrue(len(terms) > 0, msg="No terminal nodes found.")
         # check we get an expected type from evaling the nodes
         for n in terms:
             self.assertRaises(exceptions.ValidationError, n.eval)
Пример #6
0
def clear_node_cache(request):
    """
    Clear the preset cache for a single node.
    """
    evalnode = request.POST.get("node")
    jsondata = request.POST.get("script")
    nodes = json.loads(jsondata)
    tree = script.Script(nodes)
    node = tree.get_node(evalnode)
    cacheclass = pluginutils.get_dzi_cacher(settings)
    cacher = cacheclass(
            path=os.path.join(settings.MEDIA_ROOT, settings.TEMP_PATH),
            key=_cache_name(request))
    cacher.clear_cache(node)
    return HttpResponse(json.dumps({"ok": True}),
            mimetype="application/json")
Пример #7
0
    def run(self, project_pk, pid, scriptjson):
        """
        Runs the convert action.
        """
        project = Project.objects.get(pk=project_pk)
        storage = project.get_storage()
        doc = storage.get(pid)
        logger = self.get_logger()
        logger.debug("Running Document Batch Item: %s")

        # try and delete the existing binary dzi file
        dzipath = storage.document_attr_dzi_path(doc, "binary")
        dzifiles = os.path.splitext(dzipath)[0] + "_files"
        try:
            os.unlink(dzipath)
            shutil.rmtree(dzifiles)
        except OSError:
            pass

        progress_handler = get_progress_callback(self.request.id)
        abort_handler = get_abort_callback(self.request.id)
        progress_handler(0)

        tree = script.Script(json.loads(scriptjson),
                             nodekwargs=dict(logger=logger,
                                             abort_func=abort_handler,
                                             progress_func=progress_handler))
        logger.debug("Running tree: %s", json.dumps(tree.serialize(),
                                                    indent=2))
        try:
            # write out the binary... this should cache it's input
            os.environ["NODETREE_WRITE_FILEOUT"] = "1"
            doc.script_content = json.dumps(tree.serialize(), indent=2)
            doc.script_label = "%s.json" % os.path.splitext(doc.label)[0]
            doc.script_mimetype = "application/json"
            # set document metadata to indicate it's an OCR "draft"
            doc.ocr_status = status.RUNNING
            doc.save()

            # process the nodes
            [t.eval() for t in tree.get_terminals()]

        except Exception, err:
            logger.exception("Unhandled exception: %s", err)
            # set document metadata to indicate it's an OCR "draft"
            doc.ocr_status = status.ERROR
Пример #8
0
    def test_valid_scripts(self):
        """
        Test the supposedly valid script don't raise errors.
        """
        for name, nodes in self.validscripts.iteritems():
            if name.startswith("invalid"):
                continue
            s = script.Script(nodes)
            terms = s.get_terminals()
            self.assertTrue(len(terms) > 0, msg="No terminal nodes found.")

            # check we get an expected type from evaling the nodes
            for n in terms:
                out = n.eval()
                self.assertIn(type(out), (unicode, dict, list, numpy.ndarray),
                              msg="Unexpected output type for node %s: %s" %
                              (n.name, type(out)))
Пример #9
0
def script_for_page_file(scriptjson, filepath, writepath):
    """
    Modify the given script for a specific file.
    """
    tree = script.Script(json.loads(scriptjson))
    validate_batch_script(tree)
    # get the input node and replace it with out path
    input = tree.get_nodes_by_attr("stage", stages.INPUT)[0]
    input.set_param("path", filepath)
    # attach a fileout node to the binary input of the recognizer and
    # save it as a binary file
    rec = tree.get_nodes_by_attr("stage", stages.RECOGNIZE)[0]
    outpath = ocrutils.get_binary_path(filepath, writepath)
    outbin = tree.add_node("util.FileOut",
                           "OutputBinary",
                           params=[("path", os.path.abspath(outpath).encode()),
                                   ("create_dir", True)])
    outbin.set_input(0, rec.input(0))
    return json.dumps(tree.serialize(), indent=2)
Пример #10
0
 def run(self, evalnode, nodelist, writepath, cachedir):
     """
     Runs the convert action.
     """
     logger = self.get_logger()
     cacheclass = pluginutils.get_dzi_cacher(settings)
     cacher = cacheclass(path=os.path.join(settings.MEDIA_ROOT,
                                           settings.TEMP_PATH),
                         key=cachedir,
                         logger=logger)
     logger.debug("Using cacher: %s, Bases %s", cacher,
                  cacheclass.__bases__)
     try:
         tree = script.Script(nodelist,
                              nodekwargs=dict(logger=logger, cacher=cacher))
         term = tree.get_node(evalnode)
         if term is None:
             term = tree.get_terminals()[0]
         result = term.eval()
     except exceptions.NodeError, err:
         logger.error("Node Error (%s): %s", err.node, err.message)
         return dict(type="error", node=err.node.label, error=err.message)
Пример #11
0
    def handle(self, *args, **options):
        if len(args) != 3:
            raise CommandError("Usage: %s" % self.help)
        scriptfile, infile, outfile = args

        try:
            with open(scriptfile, "r") as f:
                nodes = json.load(f)
        except Exception:
            raise CommandError("Invalid script file: %s" % scriptfile)
        if nodes is None:
            raise CommandError("No nodes found in script: %s" % scriptfile)

        s = script.Script(nodes)
        input = s.get_nodes_by_attr("stage", stages.INPUT)[0]
        input.set_param("path", infile)

        term = s.get_terminals()[0]
        sys.stderr.write("Rendering to %s\n" % outfile)
        os.environ["NODETREE_WRITE_FILEOUT"] = "1"
        out = s.add_node("util.FileOut", "Output",
                params=[("path", os.path.abspath(outfile))])
        out.set_input(0, term)
        out.eval()
Пример #12
0
 def run_preset(cls, preset, handle):
     """Run a preset on the given handle."""
     s = script.Script(json.loads(preset.data))
     s = cls._set_script_input(s, handle)
     term = s.get_terminals()[0]
     return term.eval()