def simple_query(query): return json.dumps([{'book': p[0], 'page': p[1]} for p in Page.search(query)])
#!/usr/bin/env python import sys import sql from sql import Page if __name__ == '__main__': searchterm = ' '.join(sys.argv[1:]) print "Searching for %s" % searchterm book_pages = Page.search(searchterm) i = 0 for v in book_pages: if i < 6: print('%s: %s,' % (v[0], v[1])) i = i + 1 else: break
def queue_worker(): while True: page = page_queue.get(True) Page.add(page)
import threading page_queue = Queue.Queue() def getPageContents(page, pdf): return pdf.getPage(page).extractText() def queue_worker(): while True: page = page_queue.get(True) Page.add(page) for i in range(4): t = threading.Thread(target=queue_worker) t.daemon = True t.start() if __name__ == '__main__': pdf = pyPdf.PdfFileReader(file(sys.argv[2], 'rb')) page_count = pdf.getNumPages() # Sql stuff session = sql.Session() for i in range(0, page_count): p = Page(sys.argv[1], i, getPageContents(i, pdf)) page_queue.put_nowait(p)
#!/usr/bin/env python import sys import sql from sql import Page if __name__ == '__main__': searchterm = ' '.join(sys.argv[1:]) print "Searching for %s" % searchterm book_pages = Page.search(searchterm) i = 0 for v in book_pages: if i < 6: print ('%s: %s,' % (v[0], v[1])) i = i + 1 else: break