def __init__(self, store): self.store = store self.session = Session() self.cxn = store._open(self.session, 'byteCount') self.cursor = self.cxn.cursor() (key, val) = self.cursor.first() self.nextData = (key, self.store.fetch_data(self.session, key))
def process_update(self, req): self.version = "1.1" self.operationStatus = "fail" if (not req.version): diag = Diagnostic7() diag.message = "Mandatory 'version' parameter not supplied" diag.details = 'version' raise diag config = req.config db = config.parent req._db = db session = Session() session.environment = "apache" session.database = db.id if req.operation == "info:srw/operation/1/create": # Do Create self.handle_create(session, req) elif req.operation == "info:srw/operation/1/replace": # Do Replace self.handle_replace(session, req) elif req.operation == "info:srw/operation/1/delete": # Do Delete self.handle_delete(session, req) elif req.operation == "info:srw/operation/1/metadata": # Do Metadata update self.handle_metadata(session, req) else: # Barf diag = SRWDiagnostics.Diagnostic1() diag.details = "Unknown operation: %s" % req.operation self.diagnostics = [diag]
def __init__(self): self.session = Session() self.session.database = 'db_dickens' self.serv = SimpleServer( self.session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')) self.db = self.serv.get_object(self.session, self.session.database) self.qf = self.db.get_object(self.session, 'defaultQueryFactory')
def test_sessionDatabaseAssign(self): session = Session() session.database = "db_test1" self.assertEqual(session.database, "db_test1", "session.database assignment failed") session.database = "db_test2" self.assertEqual(session.database, "db_test2", "session.database re-assignment failed")
def test_sessionEnvironmentAssign(self): session = Session() session.environment = "apache" self.assertEqual(session.environment, "apache", "session.environment assignment failed") session.environment = "terminal" self.assertEqual(session.environment, "terminal", "session.environment re-assignment failed")
def setUp(self): self.session = Session() self.records = [] cls = self._get_class() for d in self._get_data(): recHash = {'xml': d, 'record': cls(self._parse_data(d), xml=d, byteCount=len(d)) } self.records.append(recHash)
def __init__(self): ''' Sets up the connection with Cheshire3. ''' self.session = Session() self.session.database = 'db_dickens' self.serv = SimpleServer( self.session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml')) self.db = self.serv.get_object(self.session, self.session.database) self.qf = self.db.get_object(self.session, 'defaultQueryFactory') self.resultSetStore = self.db.get_object(self.session, 'resultSetStore') self.idxStore = self.db.get_object(self.session, 'indexStore')
def setUp(self): self.session = Session() serverConfig = os.path.join(cheshire3Root, 'configs', 'serverConfig.xml') self.server = SimpleServer(self.session, serverConfig) for config in self._get_dependencyConfigs(): identifier = config.get('id') self.server.subConfigs[identifier] = config # Disable stdout logging lgr = self.server.get_path(self.session, 'defaultLogger') lgr.minLevel = 60 # Create object that will be tested config = self._get_config() self.testObj = makeObjectFromDom(self.session, config, self.server)
def setUp(self): self.session = Session() self.testPairs = [ ('application/xml', '<doc><foo/><bar><baz/></baz></doc>', []), ('text/plain', 'This is my document!', ['aProcessingObject']) ] self.testDocs = [] for mt, data, processHistory in self.testPairs: self.testDocs.append( StringDocument(data, mimeType=mt, creator=id(self), history=processHistory, byteCount=len(data), wordCount=len(data.split(' '))))
def __init__(self): ''' Set up a cheshire3 session/connection to the database. This initilisation does not handle the actual search term (cf. build_and_run_query). ''' self.session = Session() self.session.database = 'db_dickens' self.serv = SimpleServer(self.session, os.path.join(cheshire3Root, 'configs', 'serverConfig.xml') ) self.db = self.serv.get_object(self.session, self.session.database) self.qf = self.db.get_object(self.session, 'defaultQueryFactory') self.resultSetStore = self.db.get_object(self.session, 'resultSetStore') self.idxStore = self.db.get_object(self.session, 'indexStore')
def setUp(self): """Setup some ResultsetItems and put them into ResultSets to evaluate. N.B. a == b, other pairs should not evaluate as equal """ self.session = session = Session() # Set up same 4 ResultSetItems as for SimpleResultSetItemTestCase self.rsi1 = SimpleResultSetItem(session, id=0, recStore="recordStore", occs=5, database="", diagnostic=None, weight=0.5, resultSet=None, numeric=None) self.rsi2 = SimpleResultSetItem(session, id=0, recStore="recordStore", occs=3, database="", diagnostic=None, weight=0.5, resultSet=None, numeric=None) self.rsi3 = SimpleResultSetItem(session, id=1, recStore="recordStore", occs=1, database="", diagnostic=None, weight=0.5, resultSet=None, numeric=None) self.rsi4 = SimpleResultSetItem(session, id=0, recStore="recordStore2", occs=2, database="", diagnostic=None, weight=0.5, resultSet=None, numeric=None) # Put identical (rsi1 and rsi2) into separate ResultSets self.a = SimpleResultSet(session, [self.rsi1, self.rsi3], id="a") self.b = SimpleResultSet(session, [self.rsi2, self.rsi4], id="b")
def __init__(self, session, name=None, manager=None, debug=0): # This sets self.name mp.Process.__init__(self, name=name) self.inPipe = None self.debug = debug self.manager = manager # Reconstruct our own session, so as to not overwrite task self.session = Session(user=session.user, logger=session.logger, task=self.name, database=session.database, environment=session.environment) self.session.server = session.server self.server = session.server self.database = self.server.get_object(self.session, session.database) try: name = property(mp.Process.get_name, mp.Process.set_name) except AttributeError: pass
def setUp(self): """Setup some ResultsetItems to evaluate. N.B. a == b, other pairs should not evaluate as equal """ self.session = session = Session() self.a = SimpleResultSetItem(session, id=0, recStore="recordStore", occs=0, database="", diagnostic=None, weight=0.5, resultSet=None, numeric=None) self.b = SimpleResultSetItem(session, id=0, recStore="recordStore", occs=0, database="", diagnostic=None, weight=0.5, resultSet=None, numeric=None) self.c = SimpleResultSetItem(session, id=1, recStore="recordStore", occs=0, database="", diagnostic=None, weight=0.5, resultSet=None, numeric=None) self.d = SimpleResultSetItem(session, id=0, recStore="recordStore2", occs=0, database="", diagnostic=None, weight=0.5, resultSet=None, numeric=None)
def test_sessionInstance(self): session = Session() self.assertIsInstance(session, Session)
# from cheshire3.utils import reader from cheshire3.baseObjects import Session # Apache Config: #<Directory /usr/local/apache2/htdocs/srw> # SetHandler mod_python # PythonDebug On # PythonPath "['/home/cheshire/c3/code', '/usr/local/lib/python2.3/lib-dynload']+sys.path" # PythonHandler srwApacheHandler #</Directory> # NB. SetHandler, not AddHandler. cheshirePath = os.environ.get('C3HOME', '/home/cheshire') session = Session() session.environment = "apache" serv = SimpleServer( session, os.path.join(cheshirePath, 'cheshire3', 'configs', 'serverConfig.xml')) configs = {} serv._cacheDatabases(session) for db in serv.databases.values(): if db.get_setting(session, 'SRW') or db.get_setting(session, 'srw'): db._cacheProtocolMaps(session) map = db.protocolMaps.get('http://www.loc.gov/zing/srw/', None) map2 = db.protocolMaps.get('http://www.loc.gov/zing/srw/update/', None) configs[map.databaseUrl] = { 'http://www.loc.gov/zing/srw/': map, 'http://www.loc.gov/zing/srw/update/': map2
def test_sessionEnvironmentInit(self): session = Session(environment="apache") self.assertEqual(session.environment, "apache")
def test_sessionDatabaseInit(self): session = Session(database="db_test1") self.assertEqual(session.database, "db_test1")
def directoryRecordStoreIter(store): session = Session() for id_, data in directoryStoreIter(store): yield store._process_data(session, id_, data)
class Cheshire3Engine(BaseEngine): #schema = Schema(title=TEXT(stored=True), path=TEXT(stored=True), href=ID(stored=True), cfiBase=TEXT(stored=True), spinePos=TEXT(stored=True), content=TEXT) #database = 'db_tdo_simple_sru' cheshire_metadata_dir = '/cheshire3-metadata' session = Session() serverConfig = os.path.join(cheshire3Root, 'configs', 'serverConfig.xml') server = SimpleServer(session, serverConfig) queryFactory = None db = None titleSel = None anywhereSel = None proxExtractor = None def __initializeTitleSelector(self): try: self.titleSel = self.db.get_object(self.session, 'titleXPathSelector') except ObjectDoesNotExistException: try: self.titleSel = self.db.get_object(self.session, 'titleSelector') except ObjectDoesNotExistException as e: print e def __initializeAnywhereSelector(self): try: self.anywhereSel = self.db.get_object(self.session, 'anywhereXPathSelector') except ObjectDoesNotExistException as e: print e def __initializeProximityExtractor(self): try: self.proxExtractor = self.db.get_object(self.session, 'ProxExtractor') except ObjectDoesNotExistException as e: print e def __highlight(self, text, term, n): """Searches for text, retrieves n words either side of the text, which are retuned seperately""" term_concordance = list() text_len = len(text) term_len = len(term) term_indexes = [w.start() for w in re.finditer(term, text)] for idx in term_indexes: start = idx - n end = text_len if (idx + term_len + n) > text_len else idx + term_len + n term_concordance.append(text[start:idx] + '<b class="match term0">' + term + '</b>' + text[idx:end]) return term_concordance def open(self): """ The Cheshire get_object line should throw an exception if it can't open passed db """ try: self.db = self.server.get_object(self.session, self.databaseName) self.session.database = self.databaseName except Exception as e: print e print "openning database {} failed".format(self.databaseName) def create(self): if not os.path.exists(self.databasePath): os.makedirs(self.databasePath) # create cheshire metadata directory if needed, then initialize with empty list metadata_path = self.databasePath + self.cheshire_metadata_dir if not os.path.exists(metadata_path): os.makedirs(metadata_path) with open(metadata_path + '/' + self.databaseName, 'w') as f: json.dump({}, f) try: print "openning database {} to create".format(self.databasePath) os.system("cheshire3-init " + self.databasePath + " --database=" + self.databaseName) except Exception, e: print e
class Cheshire3Engine(BaseEngine): #schema = Schema(title=TEXT(stored=True), path=TEXT(stored=True), href=ID(stored=True), cfiBase=TEXT(stored=True), spinePos=TEXT(stored=True), content=TEXT) #database = 'db_tdo_simple_sru' cheshire_metadata_dir = '/cheshire3-metadata' session = Session() serverConfig = os.path.join(cheshire3Root, 'configs', 'serverConfig.xml') server = SimpleServer(session, serverConfig) queryFactory = None db = None titleSel = None anywhereSel = None proxExtractor = None def __initializeTitleSelector(self): try: self.titleSel = self.db.get_object(self.session, 'titleXPathSelector') except ObjectDoesNotExistException: try: self.titleSel = self.db.get_object(self.session, 'titleSelector') except ObjectDoesNotExistException as e: logging.error(e) def __initializeAnywhereSelector(self): try: self.anywhereSel = self.db.get_object(self.session, 'anywhereXPathSelector') except ObjectDoesNotExistException as e: logging.error(e) def __initializeProximityExtractor(self): try: self.proxExtractor = self.db.get_object(self.session, 'ProxExtractor') except ObjectDoesNotExistException as e: logging.error(e) def __highlight(self, text, term, n): """Searches for text, retrieves n words either side of the text, which are retuned seperately""" term_concordance = list() text_len = len(text) term_len = len(term) term_indexes = [w.start() for w in re.finditer(term, text)] for idx in term_indexes: start = idx - n end = text_len if (idx + term_len + n) > text_len else idx + term_len + n term_concordance.append(text[start:idx] + '<b class="match term0">' + term + '</b>' + text[idx:end]) return term_concordance def open(self): """ The Cheshire get_object line should throw an exception if it can't open passed db """ try: self.db = self.server.get_object(self.session, self.database_name) self.session.database = self.database_name except Exception as e: logging.error(e) logging.error("openning database {} failed".format( self.database_name)) def create(self): if not os.path.exists(self.database_path): os.makedirs(self.database_path) # create cheshire metadata directory if needed, then initialize with empty list metadata_path = self.database_path + self.cheshire_metadata_dir if not os.path.exists(metadata_path): os.makedirs(metadata_path) with open(metadata_path + '/' + self.database_name, 'w') as f: json.dump({}, f) try: logging.info("openning database {} to create".format( self.database_path)) os.system("cheshire3-init " + self.database_path + " --database=" + self.database_name) except Exception as e: logging.error(e) def add(self, path='', href='', title='', cfiBase='', spinePos=''): # first, index the document in cheshire3 using unix commands os.system("cheshire3-load --database=" + self.database_name + ' ' + path) doc_md = dict() doc_md[href] = { 'path': path, 'href': href, 'title': title, 'cfiBase': cfiBase, 'spinePos': spinePos } # title is not populated, so pulling filename from path prefix #filename = path[:path.find('/')] + '.json' metadata_path = self.database_path + self.cheshire_metadata_dir with open(metadata_path + '/' + self.database_name) as f_in: md_dict = json.load(f_in) md_dict.update(doc_md) with open(metadata_path + '/' + self.database_name, 'w') as f_out: json.dump(md_dict, f_out) #print "Current Path for directory writing: " + os.getcwd() def finished(self): """ In Cheshire, there are no cleanup commands that are needed. The add command will index specified documents fully and end, so a finished command is not required. """ pass def query(self, q, limit=None): """ In Cheshire3, you have to specify an index and query, else it defaults the all index which utilizes simple extraction. """ if self.queryFactory == None: self.queryFactory = self.db.get_object(self.session, 'defaultQueryFactory') if self.titleSel is None: self.__initializeTitleSelector() if self.anywhereSel is None: self.__initializeAnywhereSelector() if self.proxExtractor is None: self.__initializeProximityExtractor() c3Query = self.queryFactory.get_query(self.session, q) rs = self.db.search(self.session, c3Query) # open up the json file with reader specific attributes metadata_path = self.database_path + self.cheshire_metadata_dir with open(metadata_path + '/' + self.database_name) as f: db_md_dict = json.load(f) # loop through recordset, create new results list with dictionary of found values results = list() for rsi in rs: rec = rsi.fetch_record(self.session) # check the record titles titleData = self.titleSel.process_record(self.session, rec) # checking out the proximity attributes elems = self.anywhereSel.process_record(self.session, rec) doc_dict = self.proxExtractor.process_xpathResult( self.session, elems).values()[0] concordance = self.__highlight(doc_dict['text'], q, 20) pdb.set_trace() # extracts document name key fn_key = os.path.basename(titleData[3][0]) # append highlighted concordance to the dictionary db_md_dict[fn_key][u'highlight'] = " ".join(concordance) results.append(db_md_dict[fn_key]) return results
def test_sessionEnvironmentDefault(self): session = Session() self.assertEqual(session.environment, "terminal")