def update_metadata (self, newdict, reindex=true):
     lock_folder(self.__folder)
     if reindex:
         oldvals = self.get_metadata().copy()
     try:
         self.__metadata = p_update_metadata(self.metadata_path(), newdict)
         self.__date = None
         self.__category_strings = None
         self.__citation = None
     finally:
         unlock_folder(self.__folder)
     if reindex:
         # show_stack(0, "mysterious re-indexing")
         d = newdict.copy()
         for k in d.keys():
             if oldvals.get(k) == d.get(k):
                 del d[k]
         newthread = uthread.start_new_thread(_reindex_document_folder, (self.repo, self.__folder, self.id, d.keys()))
         note(3, "reindexing %s in %s", self.id, str(newthread))
def _rerip_worker(response, docs, ripper, other_rippers):
    fp = response.open("text/plain")
    try:
        for doc in docs:
            location = doc.folder()
            lock_folder(location)
            try:
                try:
                    ripper.rip(location, doc.id)
                except:
                    msg = ''.join(traceback.format_exception(*sys.exc_info()))
                    note("Error running %s ripper:\n%s", ripper.name(), msg)
                    fp.write("Error running %s ripper:\n%s" % (ripper.name(), msg))
                else:
                    fp.write("ripped %s with %s\n" % (doc.id, ripper.name()))
                    reruns = [ripper.name(),]
                    for r in doc.repo.rippers():
                        if ((other_rippers and (r.name() in other_rippers)) or
                            any([r.rerun_after_other_ripper(x) for x in reruns])):
                            try:
                                r.rip(location, doc.id)
                            except:
                                msg = ''.join(traceback.format_exception(*sys.exc_info()))
                                note("Error running %s ripper:\n%s", r.name(), msg)
                                fp.write("Error running %s ripper:\n%s" % (r.name(), msg))
                            else:
                                reruns.append(r.name())
                                fp.write("ripped %s with %s\n" % (doc.id, r.name()))
                    for r in [x for x in other_rippers if isinstance(x, Ripper)]:
                        if r.name() not in reruns:
                            try:
                                r.rip(location, doc.id)
                            except:
                                msg = ''.join(traceback.format_exception(*sys.exc_info()))
                                note("Error running %s ripper:\n%s", r.name(), msg)
                                fp.write("Error running %s ripper:\n%s" % (r.name(), msg))
                            else:
                                reruns.append(r.name())
                                fp.write("ripped %s with %s\n" % (doc.id, r.name()))
            finally:
                unlock_folder(location)
    finally:
        fp.close()
def _run_rippers (folderpath, repo, id):

    rippers = repo.rippers()
    lock_folder(folderpath)
    try:
        which_ripper = os.path.join(folderpath, "RIPPING")
        try:
            for ripper in rippers:
                note("%s:  running ripper %s", id, str(ripper))
                fp = open(which_ripper, 'wb')
                fp.write(ripper.__class__.__name__)
                fp.close()
                try:
    #                 if CODETIMER_ON:
    #                     code_timer.StartInt("newFolder$ripping-%s" % ripper.name(), "uplib")

                    ripper.rip(folderpath, id)

                finally:

                    pass
    #                 if CODETIMER_ON:
    #                     code_timer.StopInt("newFolder$ripping-%s" % ripper.name(), "uplib")

        except AbortDocumentIncorporation:
            type, value, tb = sys.exc_info()
            raise value, None, tb

        except:
            type, value, tb = sys.exc_info()
            note("Running document rippers raised the following exception:\n%s",
                 string.join(traceback.format_exception(type, value, tb)))
            note("Fixing the problem and restarting the UpLib angel will cause this document to be added to the repository.")
            # re-raise the exception
            raise value, None, tb

        else:
            if os.path.exists(which_ripper):
                os.unlink(which_ripper)
    finally:
        unlock_folder(folderpath)
def _reindex_document_folder(repo, folder, doc_id, changed_fields):
    try:
        import createIndexEntry, createHTML
        lock_folder(folder)
        try:
            note(3, "re-running some rippers on %s...", doc_id)
            standard_rippers = repo.rippers()
            rerun = []
            for i in range(len(standard_rippers)):
                ripper = standard_rippers[i]
                try:
                    if (ripper.rerun_after_metadata_changes(changed_fields=changed_fields)
                        or any([ripper.rerun_after_other_ripper(x.name()) for x in rerun])):
                        note(4, "    re-running ripper %s on %s", ripper.name(), doc_id)
                        ripper.rip(folder, doc_id)
                        rerun.append(ripper)
                except:
                    note("Exception running %s on %s:\n%s", ripper, doc_id,
                         ''.join(traceback.format_exception(*sys.exc_info())))
        finally:
            unlock_folder(folder)
    except:
        type, value, tb = sys.exc_info()
        note("while in _reindex_document_folder(%s):\n%s", doc_id, ''.join(traceback.format_exception(type, value, tb)))
def process_folder (repo, id, directory, delete_p, replace=None):

    def _protect_files (mode, dirname, files):
        for file in files:
            thepath = os.path.join(dirname, file)
            if os.path.isdir(thepath):
                os.chmod(thepath, 0700)
            else:
                os.chmod(thepath, 0600)

    note(2, "processing folder %s...", directory)

    description = None
    contents = None
    summary = None
    metadata = None
    wordbboxes = os.path.join(directory, "wordbboxes")
    tifffile = os.path.join(directory, "document.tiff")
    pageimagesdir = os.path.join(directory, "page-images")
    images = os.path.join(directory, "images")
    originals = os.path.join(directory, "originals")
    links = os.path.join(directory, "links")

    names = os.listdir(directory)
    for name in names:
        if string.lower(name) == "contents.txt":
            contents = os.path.join(directory, name)
        elif string.lower(name) == "summary.txt":
            summary = os.path.join(directory, name)
        elif string.lower(name) == "metadata.txt":
            metadata = os.path.join(directory, name)

    if replace is None:
        newdir = os.path.join(repo.pending_folder(), id)
    else:
        newdir = replace
    if not os.path.isdir(newdir):
        raise Error("Pending directory %s does not exist!" % newdir)

    try:
        lock_folder(newdir)

        try:
            if os.path.exists(images):
                destpath = os.path.join(newdir, "images")
                if replace and os.path.exists(destpath): shutil.rmtree(destpath)
                shutil.copytree (images, destpath)
                if delete_p: shutil.rmtree (images, true)
            if os.path.exists(originals):
                destpath = os.path.join(newdir, "originals")
                if replace and os.path.exists(destpath): shutil.rmtree(destpath)
                shutil.copytree (originals, destpath)
                if delete_p: shutil.rmtree (originals, true)
            if os.path.exists(links):
                destpath = os.path.join(newdir, "links")
                if replace and os.path.exists(destpath): shutil.rmtree(destpath)
                shutil.copytree (links, destpath)
                if delete_p: shutil.rmtree (links, true)
            if metadata:
                destpath = os.path.join(newdir, "metadata.txt")
                if replace and os.path.exists(destpath): os.unlink(destpath)
                shutil.copyfile(metadata, destpath)
                m = read_metadata(metadata)
                if m.has_key("title"):
                    note("Title of uploaded folder is '%s'", m['title'])
                if delete_p: os.unlink(metadata)
            else:
                # create an empty metadata.txt
                destpath = os.path.join(newdir, "metadata.txt")
                if replace and os.path.exists(destpath): os.unlink(destpath)
                mdf = open(destpath, 'w')
                mdf.flush()
                mdf.close()

            newcontents = os.path.join(newdir, "contents.txt")
            if contents:
                if replace and os.path.exists(newcontents): os.unlink(newcontents)
                shutil.copyfile(contents, newcontents)
                if delete_p: os.unlink(contents)

            newsummary = os.path.join(newdir, "summary.txt")
            if summary:
                if replace and os.path.exists(newsummary): os.unlink(newsummary)
                shutil.copyfile(summary, newsummary)
                if delete_p: os.unlink(summary)

            if os.path.exists(wordbboxes):
                destpath = os.path.join(newdir, "wordbboxes")
                if replace and os.path.exists(destpath): os.unlink(destpath)
                shutil.copyfile(wordbboxes, destpath)
                if delete_p: os.unlink(wordbboxes)

            if os.path.exists(tifffile):
                destpath = os.path.join(newdir, "document.tiff")
                if replace and os.path.exists(destpath): os.unlink(destpath)
                shutil.copyfile(tifffile, destpath)
                if delete_p: os.unlink(tifffile)
            elif os.path.isdir(pageimagesdir):
                destpath = os.path.join(newdir, "page-images")
                if replace and os.path.exists(destpath): shutil.rmtree(destpath)
                shutil.copytree(pageimagesdir, destpath)
                if delete_p: shutil.rmtree(pageimagesdir, true)

            os.path.walk(newdir, _protect_files, None)
            os.chmod(newdir, 0700)

            return id

        finally:
            unlock_folder (newdir)

    except:
        type, value, tb = sys.exc_info()
        if os.path.exists(newdir) and not replace:
            shutil.rmtree(newdir)
        # re-raise the exception
        raise value, None, tb
def index_folders (docs_dir, doc_ids, repo_index_dir):

    update_configuration()

    if not doc_ids:
        return

    if LUCENE == 'jcc':

        c = get_context(repo_index_dir)
        SECTION_LOCK.acquire()
        try:
            for id in doc_ids:
                folderpath = os.path.join(docs_dir, id)
                if os.path.isdir(folderpath):
                    lock_folder(folderpath)
                    try:
                        try:
                            c.index(folderpath, id, False)
                        except:
                            note(0, "Can't index folder %s:\n%s",
                                 folderpath, ''.join(traceback.format_exception(*sys.exc_info())))
                    finally:
                        unlock_folder(folderpath)
            c.reopen()
        finally:
            SECTION_LOCK.release()
        return

    else:

        # invoke Java to do indexing

        if len(doc_ids) > 6:

            fname = tempfile.mktemp()
            fp = open(fname, "w")
            fp.write(string.join(doc_ids, '\n'))
            fp.close()
            indexingcmd = INDEXING_BATCHADD_CMD % (JAVA, DEBUG_FLAGS, INDEXING_PROPERTIES, LUCENE_JAR, INDEXING_JAR, repo_index_dir, docs_dir, fname)
            note(3, "  indexing with %s", indexingcmd)
            SECTION_LOCK.acquire()
            try:
                status, output, tsignal = subproc(indexingcmd)
            finally:
                SECTION_LOCK.release()
                os.unlink(fname)
            note(3, "  indexing output is <%s>", output)
            if status != 0:
                raise Error ("%s signals non-zero exit status %d attempting to index %s:\n%s" % (JAVA, status, doc_ids, output))

        else:

            folders = string.join(doc_ids, ' ')
            indexingcmd = INDEXING_ADD_CMD % (JAVA, DEBUG_FLAGS, INDEXING_PROPERTIES, LUCENE_JAR, INDEXING_JAR, repo_index_dir, docs_dir, folders)
            note(3, "  indexing with %s", indexingcmd)
            SECTION_LOCK.acquire()
            try:
                status, output, tsignal = subproc(indexingcmd)
            finally:
                SECTION_LOCK.release()
            note(3, "  indexing output is <%s>", output)
            if status != 0:
                raise Error ("%s signals non-zero exit status %d attempting to index %s:\n%s" % (JAVA, status, doc_ids, output))