示例#1
0
  def single_get(self):
    "retrieve the list of documents"
    docs = scan_data.get_documents(db)

    doc_html = []
    num_docs = len(docs)
    for doc in docs:
      pages = doc.pages()
      pages_str = self._pluralify(len(pages), "page", "pages")

      doc_html.append("<li><a href=\"/document/%s\">%s</a> %s</li>" % (doc.key(), doc.name, pages_str))
    doc_html = "\t" + "\n\t".join(doc_html)

    self.set_header("Content-Type", "text/html")
    self.write("""
    <html><head><title>Document List</title></head>
    <body>
    <h1>%(num_docs)d Documents</h1>
    <ul>
    %(doc_html)s
    </ul>
    <hr />
    <h2>Create New Document</h2>
    <form method="post" action="/documents">
      Name: <input name="name" type="text"/><br/>
      Tags: <input name="tags" type="text"/><br/>
      <input type="submit" value="Create Document" />
    </form>
    </html>""" % locals())
示例#2
0
    def single_get(self):
        "retrieve the list of documents"
        docs = scan_data.get_documents(db)

        doc_html = []
        num_docs = len(docs)
        for doc in docs:
            pages = doc.pages()
            pages_str = self._pluralify(len(pages), "page", "pages")

            doc_html.append("<li><a href=\"/document/%s\">%s</a> %s</li>" %
                            (doc.key(), doc.name, pages_str))
        doc_html = "\t" + "\n\t".join(doc_html)

        self.set_header("Content-Type", "text/html")
        self.write("""
    <html><head><title>Document List</title></head>
    <body>
    <h1>%(num_docs)d Documents</h1>
    <ul>
    %(doc_html)s
    </ul>
    <hr />
    <h2>Create New Document</h2>
    <form method="post" action="/documents">
      Name: <input name="name" type="text"/><br/>
      Tags: <input name="tags" type="text"/><br/>
      <input type="submit" value="Create Document" />
    </form>
    </html>""" % locals())
示例#3
0
def orphaned_files(db, dir):
  docs = scan_data.get_documents(db)
  doc_pks = set()
  for doc in docs:
    doc_pks.update([ page.key() for page in doc.pages() ])

  file_pks = set()
  for (base, dirs, files) in os.walk(dir):
    # prevent recursion
    del dirs[0:-1]

    for file in files:
      if file.startswith("page-"):
        file_pks.add(os.path.splitext(file)[0])

  return file_pks - doc_pks
示例#4
0
def orphaned_files(db, dir):
    docs = scan_data.get_documents(db)
    doc_pks = set()
    for doc in docs:
        doc_pks.update([page.key() for page in doc.pages()])

    file_pks = set()
    for (base, dirs, files) in os.walk(dir):
        # prevent recursion
        del dirs[0:-1]

        for file in files:
            if file.startswith("page-"):
                file_pks.add(os.path.splitext(file)[0])

    return file_pks - doc_pks
示例#5
0
def main(args):
    db = dbm.open("scan_data", "r")
    print "loaded database"

    mdb = pymongo.Connection()
    print "connected to mongo"

    docs = scan_data.get_documents(db)
    print "loaded %d documents" % len(docs)

    mdb_docs = mdb.scanserver.documents

    # go through each document and build a set of pagekeys
    pagekeys = set()
    for doc in docs:
        docstr = scan_data.doc2json(doc)
        pagekeys.update([page.key() for page in doc.pages()])
        mdb_docs.insert(docstr)

    print "found %d pages" % len(pagekeys)

    mdb_pages = mdb.scanserver.pages

    for pk in pagekeys:
        page = scan_data.read_page(pk, db)
        pagestr = scan_data.page2json(page)
        mdb_pages.insert(pagestr)

    dirpks = set()
    for (base, dirs, files) in os.walk("static", topdown=True):
        # don't recurse any further
        del dirs[0:-1]

        for fname in files:
            pk = os.path.splitext(fname)[0]
            if pk.startswith("page-"):
                dirpks.add(pk)

    notindb = dirpks - pagekeys
    print "found %d page keys on disk that aren't in the database" % len(
        notindb)
示例#6
0
def main(args):
  db = dbm.open("scan_data", "r")
  print "loaded database"

  mdb = pymongo.Connection()
  print "connected to mongo"

  docs = scan_data.get_documents(db)
  print "loaded %d documents" % len(docs)

  mdb_docs = mdb.scanserver.documents

  # go through each document and build a set of pagekeys
  pagekeys = set()
  for doc in docs:
    docstr = scan_data.doc2json(doc)
    pagekeys.update( [ page.key() for page in doc.pages() ] )
    mdb_docs.insert(docstr)

  print "found %d pages" % len(pagekeys)

  mdb_pages = mdb.scanserver.pages

  for pk in pagekeys:
    page = scan_data.read_page(pk, db)
    pagestr = scan_data.page2json(page)
    mdb_pages.insert(pagestr)

  dirpks = set()
  for (base, dirs, files) in os.walk("static",  topdown=True):
    # don't recurse any further
    del dirs[0:-1]

    for fname in files:
      pk = os.path.splitext(fname)[0]
      if pk.startswith("page-"):
        dirpks.add(pk)

  notindb = dirpks - pagekeys
  print "found %d page keys on disk that aren't in the database" % len(notindb)