def test_get_document(self): _add_documents([ ('000000000', 'this is file 000000000'), ('000000002', 'this is file 000000002'), ]) fp = docarchive.get_document('000000000') self.assertEqual(fp.read(), 'this is file 000000000') fp = docarchive.get_document('000000002') self.assertEqual(fp.read(), 'this is file 000000002')
def main(argv): if len(argv) < 3: print __doc__ sys.exit(-1) option = argv[1] path_or_id = argv[2] fp = None try: if os.path.exists(path_or_id): fp = file(path_or_id, 'rb') else: path_or_id = ('000000000' + path_or_id)[-9:] fp = docarchive.get_document(path_or_id) if option == '-s': stripTags(fp, sys.stdout) elif option == '-r': render(fp, sys.stdout) else: print __doc__ sys.exit(-1) finally: if fp: fp.close()
def highlight(self, analyzer, highlighter): maxNumFragmentsRequired = 2 try: fp = docarchive.get_document(self.docid) except Exception, e: # maybe the index is outdate to refer to some non-exist file log.exception('Unable to get "%s"' % self.docid)
def test_add_document(self): ah = docarchive.ArchiveHandler('w') # add files to archive ah.add_document('000000000', StringIO.StringIO('this is doc 000000000')) zfile, arc_path = ah.zfile, ah.arc_path ah.add_document('000000001', StringIO.StringIO('this is doc 000000001')) # assert 000000.zip remain open self.assert_(zfile == ah.zfile) self.assert_(arc_path == ah.arc_path) ah.add_document('000001001', StringIO.StringIO('this is doc 000001001')) # assert 000000.zip is switched self.assert_(zfile != ah.zfile) self.assert_(arc_path != ah.arc_path) ah.close() # check two zip files are created self.assert_(os.path.exists(os.path.join(self.apath, '000000.zip'))) self.assert_(os.path.exists(os.path.join(self.apath, '000001.zip'))) # check content fp = docarchive.get_document('000000001') self.assertEqual(fp.read(), 'this is doc 000000001')
def main(argv): if len(argv) < 3: print __doc__ sys.exit(-1) from minds import proxy proxy.init(proxy.CONFIG_FILENAME) # config to read the actual archive option = argv[1] path_or_id = argv[2] fp = None try: if os.path.exists(path_or_id): fp = file(path_or_id, 'rb') else: fp = docarchive.get_document(path_or_id) if option == '-s': stripTags(fp, sys.stdout) elif option == '-r': render(fp, sys.stdout) else: print __doc__ sys.exit(-1) finally: if fp: fp.close()
def main(rfile, wfile, env): form = cgi.FieldStorage(fp=rfile, environ=env) docid = form.getvalue('docid','') if len(docid) != 9: pass # todo: 404 wfile.write( """Content-type: text/html; charset=UTF-8\r Cache-control: no-cache\r \r """) fp = docarchive.get_document(docid) distillparse.render(fp, wfile) # todo: except 404
def _check_archive_doc(self, docid, *signatures): fp = docarchive.get_document(docid) # test docid exists (i.e. no exception) data = fp.read(1024) for s in signatures: self.assert_(0 <= data.find(s), s) # have signatures