def single_get(self): "retrieve the list of documents" docs = scan_data.get_documents(db) doc_html = [] num_docs = len(docs) for doc in docs: pages = doc.pages() pages_str = self._pluralify(len(pages), "page", "pages") doc_html.append("<li><a href=\"/document/%s\">%s</a> %s</li>" % (doc.key(), doc.name, pages_str)) doc_html = "\t" + "\n\t".join(doc_html) self.set_header("Content-Type", "text/html") self.write(""" <html><head><title>Document List</title></head> <body> <h1>%(num_docs)d Documents</h1> <ul> %(doc_html)s </ul> <hr /> <h2>Create New Document</h2> <form method="post" action="/documents"> Name: <input name="name" type="text"/><br/> Tags: <input name="tags" type="text"/><br/> <input type="submit" value="Create Document" /> </form> </html>""" % locals())
def orphaned_files(db, dir): docs = scan_data.get_documents(db) doc_pks = set() for doc in docs: doc_pks.update([ page.key() for page in doc.pages() ]) file_pks = set() for (base, dirs, files) in os.walk(dir): # prevent recursion del dirs[0:-1] for file in files: if file.startswith("page-"): file_pks.add(os.path.splitext(file)[0]) return file_pks - doc_pks
def orphaned_files(db, dir): docs = scan_data.get_documents(db) doc_pks = set() for doc in docs: doc_pks.update([page.key() for page in doc.pages()]) file_pks = set() for (base, dirs, files) in os.walk(dir): # prevent recursion del dirs[0:-1] for file in files: if file.startswith("page-"): file_pks.add(os.path.splitext(file)[0]) return file_pks - doc_pks
def main(args): db = dbm.open("scan_data", "r") print "loaded database" mdb = pymongo.Connection() print "connected to mongo" docs = scan_data.get_documents(db) print "loaded %d documents" % len(docs) mdb_docs = mdb.scanserver.documents # go through each document and build a set of pagekeys pagekeys = set() for doc in docs: docstr = scan_data.doc2json(doc) pagekeys.update([page.key() for page in doc.pages()]) mdb_docs.insert(docstr) print "found %d pages" % len(pagekeys) mdb_pages = mdb.scanserver.pages for pk in pagekeys: page = scan_data.read_page(pk, db) pagestr = scan_data.page2json(page) mdb_pages.insert(pagestr) dirpks = set() for (base, dirs, files) in os.walk("static", topdown=True): # don't recurse any further del dirs[0:-1] for fname in files: pk = os.path.splitext(fname)[0] if pk.startswith("page-"): dirpks.add(pk) notindb = dirpks - pagekeys print "found %d page keys on disk that aren't in the database" % len( notindb)
def main(args): db = dbm.open("scan_data", "r") print "loaded database" mdb = pymongo.Connection() print "connected to mongo" docs = scan_data.get_documents(db) print "loaded %d documents" % len(docs) mdb_docs = mdb.scanserver.documents # go through each document and build a set of pagekeys pagekeys = set() for doc in docs: docstr = scan_data.doc2json(doc) pagekeys.update( [ page.key() for page in doc.pages() ] ) mdb_docs.insert(docstr) print "found %d pages" % len(pagekeys) mdb_pages = mdb.scanserver.pages for pk in pagekeys: page = scan_data.read_page(pk, db) pagestr = scan_data.page2json(page) mdb_pages.insert(pagestr) dirpks = set() for (base, dirs, files) in os.walk("static", topdown=True): # don't recurse any further del dirs[0:-1] for fname in files: pk = os.path.splitext(fname)[0] if pk.startswith("page-"): dirpks.add(pk) notindb = dirpks - pagekeys print "found %d page keys on disk that aren't in the database" % len(notindb)