def main(): db = Database('/tmp/tut_update') db.create() x_ind = WithXIndex(db.path, 'x') db.add_index(x_ind) # full examples so we had to add first the data # the same code as in previous step for x in xrange(100): db.insert(dict(x=x)) for y in xrange(100): db.insert(dict(y=y)) # end of insert part print db.count(db.all, 'x') for curr in db.all('x', with_doc=True): doc = curr['doc'] if curr['key'] % 7 == 0: db.delete(doc) elif curr['key'] % 5 == 0: doc['updated'] = True db.update(doc) print db.count(db.all, 'x') for curr in db.all('x', with_doc=True): print curr
def main(): db = Database('/tmp/tut1') db.create() for x in xrange(100): print db.insert(dict(x=x)) for curr in db.all('id'): print curr
class DBImport: ''' import scan: scans existing self.db and rebuilds config file create self.db: creates self.db file, master index, question index and table index ''' def __init__(self,passkey,xtraDB): self.key = passkey self.dbName = xtraDB self.db=Database(self.dbName) self.importScan() def __del__(self): if (self.db.opened): self.db.close() # ADD REBUILD OPTION def importScan(self): #read from config, as a check self.db=Database(self.dbName) if(self.db.exists()): self.db.open() self.db.id_ind.enc_key = self.key for curr in self.db.all('id'): #since first passkey in self.db should be only one there, function only perfomed once if curr['t'] == 'master': masterKey=''.join(curr['_id']) self.DBConfig = AppConfig() self.DBConfig.putmap('databaseinfo','indexkey',masterKey)#masterkey=value self.DBConfig.putmap('databaseinfo','databasename',self.dbName) break #add else statement for errors if couldnt be written for found self.db.close() return True
def test_all(self, tmpdir, sh_nums): db = Database(str(tmpdir) + '/db') db.create(with_id_index=False) n = globals()['ShardedUniqueHashIndex%d' % sh_nums] db.add_index(n(db.path, 'id')) l = [] for x in xrange(10000): l.append(db.insert(dict(x=x))['_id']) for curr in db.all('id'): l.remove(curr['_id']) assert l == []
def recreate_db(chat_history_dir): """ """ global _LocalStorage try: _LocalStorage.close() except Exception as exc: lg.warn('failed closing local storage : %r' % exc) _LocalStorage = None dbs = Database(chat_history_dir) dbs.custom_header = message_index.make_custom_header() temp_dir = os.path.join(settings.ChatHistoryDir(), 'tmp') if os.path.isdir(temp_dir): bpio._dir_remove(temp_dir) orig_dir = os.path.join(settings.ChatHistoryDir(), 'orig') if os.path.isdir(orig_dir): bpio._dir_remove(orig_dir) dbt = Database(temp_dir) dbt.custom_header = message_index.make_custom_header() source_opened = False try: dbs.open() source_opened = True except Exception as exc: lg.warn('failed open local storage : %r' % exc) # patch_flush_fsync(dbs) dbt.create() dbt.close() refresh_indexes(dbt, reindex=False) dbt.open() # patch_flush_fsync(dbt) if source_opened: for c in dbs.all('id'): del c['_rev'] dbt.insert(c) dbt.close() if source_opened: dbs.close() os.rename(dbs.path, orig_dir) os.rename(dbt.path, dbs.path) _LocalStorage = Database(chat_history_dir) _LocalStorage.custom_header = message_index.make_custom_header() db().open() # patch_flush_fsync(db()) if refresh_indexes(db(), rewrite=False, reindex=False): bpio._dir_remove(orig_dir) lg.info('local DB re-created in %r' % chat_history_dir) else: lg.err('local DB is broken !!!')
def migrate(source, destination): """ Very basic for now """ dbs = Database(source) dbt = Database(destination) dbs.open() dbt.create() dbt.close() for curr in os.listdir(os.path.join(dbs.path, "_indexes")): if curr != "00id.py": shutil.copyfile(os.path.join(dbs.path, "_indexes", curr), os.path.join(dbt.path, "_indexes", curr)) dbt.open() for c in dbs.all("id"): del c["_rev"] dbt.insert(c) return True
class DBImport: ''' import scan: scans existing self.db and rebuilds config file create self.db: creates self.db file, master index, question index and table index ''' def __init__(self, passkey, xtraDB): self.key = passkey self.dbName = xtraDB self.db = Database(self.dbName) self.importScan() def __del__(self): if (self.db.opened): self.db.close() # ADD REBUILD OPTION def importScan(self): #read from config, as a check self.db = Database(self.dbName) if (self.db.exists()): self.db.open() self.db.id_ind.enc_key = self.key for curr in self.db.all( 'id' ): #since first passkey in self.db should be only one there, function only perfomed once if curr['t'] == 'master': masterKey = ''.join(curr['_id']) self.DBConfig = AppConfig() self.DBConfig.putmap('databaseinfo', 'indexkey', masterKey) #masterkey=value self.DBConfig.putmap('databaseinfo', 'databasename', self.dbName) break #add else statement for errors if couldnt be written for found self.db.close() return True
def migrate(source, destination): """ Very basic for now """ dbs = Database(source) dbt = Database(destination) dbs.open() dbt.create() dbt.close() for curr in os.listdir(os.path.join(dbs.path, '_indexes')): if curr != '00id.py': shutil.copyfile(os.path.join(dbs.path, '_indexes', curr), os.path.join(dbt.path, '_indexes', curr)) dbt.open() for c in dbs.all('id'): del c['_rev'] dbt.insert(c) return True
def main(): db = Database('/tmp/demo_secure') key = 'abcdefgh' id_ind = EncUniqueHashIndex(db.path, 'id', storage_class='Salsa20Storage') db.set_indexes([id_ind]) db.create() db.id_ind.enc_key = key for x in xrange(100): db.insert(dict(x=x, data='testing')) db.close() dbr = Database('/tmp/demo_secure') dbr.open() dbr.id_ind.enc_key = key for curr in dbr.all('id', limit=5): print curr
class Developer: def __init__(self, passkey, dbname=None): self.key = passkey if (dbname == None): self.DBConfig = AppConfig() self.dbName = self.DBConfig.mapget('databaseinfo')['databasename'] else: self.dbName = dbname self.db = Database(self.dbName) def dump(self): if (self.db.exists()): self.db.open() self.db.id_ind.enc_key = self.key for curr in self.db.all('id'): print curr self.db.close()
def main(): db = Database("/tmp/demo_secure") key = "abcdefgh" id_ind = EncUniqueHashIndex(db.path, "id") db.set_indexes([id_ind]) db.create() db.id_ind.enc_key = key print db.id_ind.storage for x in xrange(100): db.insert(dict(x=x, data="testing")) db.close() dbr = Database("/tmp/demo_secure") dbr.open() dbr.id_ind.enc_key = key for curr in dbr.all("id", limit=5): print curr
def main(): db2 = pickledb.load('examlple.db', True) db2.set('test', 'test') db = Database('/home/papaja/Zaloha/target/store.db') db.open() # db.create() # print database # x_ind = WithHashIndex(db.path, 'hash') # pointer_ind = WithHashIndex(db.path, 'pointer') # db.add_index(x_ind) # db.add_index(pointer_ind) # db.insert({'hash':'3f8ee76c84d95c3f4ed061db98694be57e7d33da', 'pointer':1}) # # for x in xrange(100): # db.insert(dict(x='3f8ee76c84d95c3f4ed061db98694be57e7d33da')) # for curr in db.all('id'): # curr['x'] = 1 # db.update(curr) # print curr for curr in db.all('id'): print curr try: test = db.get('hash', '3f8ee76c84d95c3f4ed061db98694be57e7d33da', with_doc=True) print test except RecordNotFound: print "Nieje rekord" exit() test['doc']['pointer'] = test['doc']['pointer'] + 1 db.update(test['doc']) for curr in db.all('id'): print curr exit() lstat = os.lstat("/home/papaja/.cache/keyring-SZ5Lrw/gpg") mode = lstat.st_mode if S_ISDIR(mode): print("dir") elif S_ISREG(mode): print("file") elif S_ISLNK(mode): print("link") else: print("None") print(mode) print(lstat) print(S_ISFIFO(mode)) exit() #print(os.readlink('/home/papaja/Zaloha/target/objects/test')) #shutil.move("/home/papaja/Zaloha/target/journal/objects/a3fe40b52ec03a7e2d8c8c0ca86baaf0192038c5.meta", "/home/papaja/Zaloha/target/objects") #shutil.rmtree(os.path.join("/home/papaja/", "objects")) # myFile = MyFile('/home/papaja/third') # print(myFile.readline().decode("UTF-8")) # dst = open('/home/mint/Diplomovka/first', 'wb') # src = open('second', 'rb') # synced = open('/home/papaja/third', 'wb') # signatureFile = open('signature', 'wb') # deltaFile = open('/home/papaja/delta', 'rb'); # hashes = pyrsync2.blockchecksums(dst) # hashes_save = { # weak: (index, strong) for index, (weak, strong) # in enumerate(hashes) # } # signature.write(bytes('gz\n', "UTF-8")) # pickle.dump(hashes_save, signature, pickle.HIGHEST_PROTOCOL) # type = signature.readline().decode("UTF-8") # print("Typ {}".format(type.strip())) # signature.readline() # hashes_save = pickle.load(signature) # print(hashes_save) # delta = pyrsync2.rsyncdelta(src, hashes_save) # pyrsync2.patchstream(dst, synced, delta) # io.FileIO # signature = librsync.signature(dst) # delta = librsync.delta(src, signature) # librsync.patch(dst, delta, synced) # synced.close() temp = tempfile.NamedTemporaryFile() skuska = open(temp.name, "wb") dst = open('/home/mint/Diplomovka/first', 'rb') velkost = open('/home/mint/Diplomovka/velkost', 'rb') retazec = 'ahoj' print(len(retazec)) print(velkost.readline()) print(velkost.read(3)) #velkost.write(str(sys.getsizeof(retazec))) dst_data = dst.read(16) while dst_data: skuska.write(dst_data) dst_data = dst.read(16) skuska.close() patchProcess = subprocess.Popen(['rdiff', 'patch', temp.name, '/home/mint/Diplomovka/delta'], stdout=subprocess.PIPE) patchFile, patchError = patchProcess.communicate() # print patchFile # dst_data = dst.read(16) while dst_data: #patchProcess.stdin.write(dst_data) dst_data = dst.read(16) # # patchProcess.stdin.write(dst_data) #patchProcess.stdin.write(dst_data) #patchProcess.stdin.close() # while True: # print('******') # patchData = patchProcess.stdout.read(16) # if patchData: # print(patchData) # else: # break dst.close()
class DayEntry: #checker class ''' checks day hash or creates a new one once instatiated, it checks for: - if day key in config coincideds with todays date - if there isnt a date in config, it scans database for the one matching todays - if no date in conifig, or it's the wrong date, new row is made (only if there isnt one with matching date in the entire self.db) ''' def __init__(self, passkey): self.todayDate = str(getDayStart()) self.key = passkey self.DBConfig = AppConfig() self.dayKey = None # setup befpore checking, avoid attribute error self.dbName = self.DBConfig.mapget('databaseinfo')['databasename'] self.db = Database(self.dbName) try: self.dayKey = self.DBConfig.mapget('databaseinfo')['daykey'] except KeyError: # if notthin in config, check self.db for entry daystatus = self.checkfordate() if (daystatus == False): self.makeDayRow() self.DBConfig.putmap('databaseinfo', 'daykey', self.dayKey) #if true do nothing, config file fixed else: daystatus = self.checkfordate( ) #if false, scans for right one, and fixes config oldcompare = self.dayKey self.dayKey = self.DBConfig.mapget('databaseinfo')['daykey'] if (daystatus == False) & (oldcompare == self.dayKey): self.makeDayRow() self.DBConfig.putmap('databaseinfo', 'daykey', self.dayKey) if (daystatus == True): #everything all good pass #nothing created just a check def __del__(self): if (self.db.opened): self.db.close() def makeDayRow(self): if (self.checkfordate() == True): #already exists no need to write return False dbindex = DBIndexSystem(self.key) dayrow = {} dayrow["date"] = self.todayDate if (self.db.exists() == True): self.db.open() self.db.id_ind.enc_key = self.key self.db.insert(dayrow) self.db.close() #must close first , no double opens self.getDayRowID() # resfresh day key dbindex.TindexPut(self.dayKey) ## would normally write to config file return True def getDayRowID(self): #gets row id by date if (self.db.exists()): self.db.open() self.db.id_ind.enc_key = self.key for curr in self.db.all('id'): try: if curr['date'] == str(self.todayDate): dataop = curr['_id'] dataop = "".join( dataop ) #_id is returned as a list of charaters, must be concatenated to string self.db.close() self.dayKey = dataop return dataop #returns datestring except KeyError: continue #break #if it makes it here, entry doesnt exist self.db.close() return False #there is a probplem def checkfordate(self): #checks for existance of that date in self.db if (self.db.exists()): self.db.open() self.db.id_ind.enc_key = self.key if (self.dayKey != None): dayrow = self.db.get('id', self.dayKey, with_doc=True) #doesnt account for if there is an entry in the config that doesnt exist if dayrow['date'] == str(self.todayDate): self.db.close() return True for curr in self.db.all('id'): #try to search try: if curr['date'] == str(self.todayDate): self.DBConfig.putmap('databaseinfo', 'daykey', "".join( curr['_id'])) #fix lost entry self.db.close() return False except KeyError: continue #break #if it makes it here, entry doesnt exist and nothing was remapped self.db.close() return False
class DBSubsystem: ''' import scan: scans existing self.db and rebuilds config file create self.db: creates self.db file, master index, question index and table index ''' def __init__(self, passkey, xtraDB=None): self.DATABASE_SOFTWARE_VERSION = "0.3.1a" self.key = passkey self.DBConfig = AppConfig() self.dbval = xtraDB def __del__(self): if (self.db.opened): self.db.close() # ADD REBUILD OPTION def createDB(self): if (self.creationCheck()): self.buildDB() return True else: return False def creationCheck(self): if (Integrity().checkExists() == False): if (self.dbval != None): self.DBConfig.createConfig() self.DBConfig.putmap('databaseinfo', 'databasename', self.dbval) self.dbName = self.dbval return True else: return False else: #if integrity passed as ok existing return False def buildDB(self): from _dbindex import EncUniqueHashIndex self.dbName = self.DBConfig.mapget('databaseinfo')['databasename'] self.db = Database(self.dbName) id_ind = EncUniqueHashIndex(self.db.path, 'id') self.db.set_indexes([id_ind]) self.db.create() self.db.id_ind.enc_key = self.key self.db.close() self.createMasterindex() #create master index passkey, only once self.createQindex() self.createTindex() #add error handling return True ''' @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ Index Creation @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ ''' def createMasterindex(self): if (self.db.exists()): self.db.open() self.db.id_ind.enc_key = self.key #this function assumes database self.db.insert( dict(t='master', Qindex=None, Tindex=None, DBVersion=self.DATABASE_SOFTWARE_VERSION)) for curr in self.db.all( 'id' ): #since first passkey in self.db should be only one there, function only perfomed once if curr['t'] == 'master': self.masterIndex = ''.join(curr['_id']) self.DBConfig.putmap('databaseinfo', 'indexkey', self.masterIndex) #masterkey=value break #add else statement for errors if couldnt be written for found self.db.close() return self.masterIndex def createQindex(self): if (self.db.exists()): self.db.open() self.db.id_ind.enc_key = self.key #this function assumes database #insert question index self.db.insert(dict(t='Qindex')) #get question index passkey, form type qintex (t=xxxx) for curr in self.db.all( 'id' ): #since first passkey in self.db should be only one there, function only perfomed once if curr['t'] == 'Qindex': self.Qindexkey = ''.join(curr['_id']) break #add else statement for errors if couldnt be written for found #write Qindex passkey to master index indexRow = self.db.get('id', self.masterIndex, with_doc=True) #write question index passkey to master index indexRow['Qindex'] = self.Qindexkey self.db.update(indexRow) self.db.close() #wrote new Qindex passkey to master index passkey def createTindex(self): self.dbName = self.DBConfig.mapget('databaseinfo')['databasename'] self.masterIndex = self.DBConfig.mapget('databaseinfo')['indexkey'] self.db = Database(self.dbName) if (self.db.exists()): self.db.open() self.db.id_ind.enc_key = self.key #this function assumes database #insert question index self.db.insert(dict(t='Tindex', table=[])) #get question index passkey, form type qintex (t=xxxx) for curr in self.db.all( 'id' ): #since first passkey in self.db should be only one there, function only perfomed once if curr['t'] == 'Tindex': self.Tindexkey = ''.join(curr['_id']) break #add else statement for errors if couldnt be written for found #write Qindex passkey to master index indexRow = self.db.get('id', self.masterIndex, with_doc=True) #write question index passkey to master index indexRow['Tindex'] = self.Tindexkey self.db.update(indexRow) self.db.close() #wrote new Qindex passkey to master index passkey '''
class cache: """ cache for word morphological analysis """ def __init__(self, ): """ Create Analex Cache """ # use this dictionary as a local cache, # The global db will be updated on destructing object self.cache = {} self.db = Database('~/tmp/thaalibCache') if not self.db.exists(): self.db.create() x_ind = WithAIndex(self.db.path, 'a') self.db.add_index(x_ind) else: self.db.open() def __del__(self): """ Delete instance and clear cache """ self.cache = None self.db.close() def update(self): """update data base """ for word in self.cache: self.add_checked(word, self.cache[word]) def is_already_checked(self, word): try: return bool(self.db.get('a', word)) except: return False #~ except: return False; def get_checked(self, word): try: x = self.db.get('a', word, with_doc=True) y = x.get('doc', False) if y: return y.get('d', []) else: return [] except: return [] def add_checked(self, word, data): idata = {"a": word, 'd': data} try: saved = self.db.get('a', word, with_doc=True) except: saved = False if saved: saved['doc']['d'] = data doc = saved['doc'] doc['update'] = True self.db.update(doc) else: self.db.insert(idata) def exists_cache_word(self, word): """ test if word exists in cache""" #if exists in cache dictionary if word in self.cache: return True else: # test in database if self.is_already_checked(word): stored_data = self.get_checked(word) self.cache[word] = stored_data return bool(self.cache[word]) else: # add null dict to the word index to avoid multiple database check self.cache[word] = {} return {} def get_relation_freq(self, word_prev, word_cur, relation): self.exists_cache_word(word_prev) return self.cache.get(word_prev, {}).get(word_cur, {}).get(relation, 0) def is_related(self, word_prev, word_cur): """ test if two words are related""" #serach in cache self.exists_cache_word(word_prev) # if exists in cache or database return self.cache.get(word_prev, {}).get(word_cur, {}) def add_relation(self, word_prev, word_cur, relation): #~ relation ='r'+str(relation) if word_prev not in self.cache: # test first that is in db cache if self.is_already_checked(word_prev): stored_data = self.get_checked(word_prev) self.cache[word_prev] = stored_data else: # create an new entry self.cache[word_prev] = { word_cur: { relation: 1, }, } # word_prev exists # add word_cur to previous dict elif word_cur not in self.cache[word_prev]: self.cache[word_prev][word_cur] = { relation: 1, } elif relation not in self.cache[word_prev][word_cur]: self.cache[word_prev][word_cur][relation] = 1 else: self.cache[word_prev][word_cur][relation] += 1 def display_all(self): """ display all contents of data base """ print "aranasyn.cache: dislay all records in Thaalib Database " "" for curr in self.db.all('a', with_doc=True): print curr['doc']['a'], arepr(curr['doc']['d'])
def get(self, key): return super(MultiIndex, self).get(key) def make_key_value(self, data): return data['l'], None if __name__ == '__main__': from CodernityDB.database import Database db = Database('/tmp/db_test') db.create() db.add_index(MultiIndex(db.path, 'multi')) for x in xrange(2): d = dict(l=range(10 * x, 10 * (x + 1))) db.insert(d) for curr in db.all('multi'): print curr for curr in db.all('id'): nl = map(lambda x: x * 10, curr['l']) curr['l'] = nl db.update(curr) for curr in db.all('multi'): print curr for curr in db.all('id'): nl = map(lambda x: x % 3, curr['l']) curr['l'] = nl print nl db.update(curr)
def main(): db2 = pickledb.load('examlple.db', True) db2.set('test', 'test') db = Database('/home/papaja/Zaloha/target/store.db') db.open() # db.create() # print database # x_ind = WithHashIndex(db.path, 'hash') # pointer_ind = WithHashIndex(db.path, 'pointer') # db.add_index(x_ind) # db.add_index(pointer_ind) # db.insert({'hash':'3f8ee76c84d95c3f4ed061db98694be57e7d33da', 'pointer':1}) # # for x in xrange(100): # db.insert(dict(x='3f8ee76c84d95c3f4ed061db98694be57e7d33da')) # for curr in db.all('id'): # curr['x'] = 1 # db.update(curr) # print curr for curr in db.all('id'): print curr try: test = db.get('hash', '3f8ee76c84d95c3f4ed061db98694be57e7d33da', with_doc=True) print test except RecordNotFound: print "Nieje rekord" exit() test['doc']['pointer'] = test['doc']['pointer'] + 1 db.update(test['doc']) for curr in db.all('id'): print curr exit() lstat = os.lstat("/home/papaja/.cache/keyring-SZ5Lrw/gpg") mode = lstat.st_mode if S_ISDIR(mode): print("dir") elif S_ISREG(mode): print("file") elif S_ISLNK(mode): print("link") else: print("None") print(mode) print(lstat) print(S_ISFIFO(mode)) exit() #print(os.readlink('/home/papaja/Zaloha/target/objects/test')) #shutil.move("/home/papaja/Zaloha/target/journal/objects/a3fe40b52ec03a7e2d8c8c0ca86baaf0192038c5.meta", "/home/papaja/Zaloha/target/objects") #shutil.rmtree(os.path.join("/home/papaja/", "objects")) # myFile = MyFile('/home/papaja/third') # print(myFile.readline().decode("UTF-8")) # dst = open('/home/mint/Diplomovka/first', 'wb') # src = open('second', 'rb') # synced = open('/home/papaja/third', 'wb') # signatureFile = open('signature', 'wb') # deltaFile = open('/home/papaja/delta', 'rb'); # hashes = pyrsync2.blockchecksums(dst) # hashes_save = { # weak: (index, strong) for index, (weak, strong) # in enumerate(hashes) # } # signature.write(bytes('gz\n', "UTF-8")) # pickle.dump(hashes_save, signature, pickle.HIGHEST_PROTOCOL) # type = signature.readline().decode("UTF-8") # print("Typ {}".format(type.strip())) # signature.readline() # hashes_save = pickle.load(signature) # print(hashes_save) # delta = pyrsync2.rsyncdelta(src, hashes_save) # pyrsync2.patchstream(dst, synced, delta) # io.FileIO # signature = librsync.signature(dst) # delta = librsync.delta(src, signature) # librsync.patch(dst, delta, synced) # synced.close() temp = tempfile.NamedTemporaryFile() skuska = open(temp.name, "wb") dst = open('/home/mint/Diplomovka/first', 'rb') velkost = open('/home/mint/Diplomovka/velkost', 'rb') retazec = 'ahoj' print(len(retazec)) print(velkost.readline()) print(velkost.read(3)) #velkost.write(str(sys.getsizeof(retazec))) dst_data = dst.read(16) while dst_data: skuska.write(dst_data) dst_data = dst.read(16) skuska.close() patchProcess = subprocess.Popen( ['rdiff', 'patch', temp.name, '/home/mint/Diplomovka/delta'], stdout=subprocess.PIPE) patchFile, patchError = patchProcess.communicate() # print patchFile # dst_data = dst.read(16) while dst_data: #patchProcess.stdin.write(dst_data) dst_data = dst.read(16) # # patchProcess.stdin.write(dst_data) #patchProcess.stdin.write(dst_data) #patchProcess.stdin.close() # while True: # print('******') # patchData = patchProcess.stdout.read(16) # if patchData: # print(patchData) # else: # break dst.close()
class cache : """ cache for word morphological analysis """ DB_PATH = os.path.join(os.path.expanduser('~'), '.thaalabCache') def __init__(self, cache_path=False): """ Create Analex Cache """ # use this dictionary as a local cache, # The global db will be updated on destructing object # get the database path if hasattr(sys, 'frozen'): # only when running in py2exe this exists base = sys.prefix else: # otherwise this is a regular python script base = os.path.dirname(os.path.realpath(__file__)) if not cache_path: file_path = self.DB_PATH else: file_path = os.path.join(os.path.dirname(cache_path), '.thaalabCache') self.cache={}; self.db = Database(file_path) if not self.db.exists(): self.db.create(); x_ind = WithAIndex(self.db.path, 'a') self.db.add_index(x_ind) else: self.db.open(); def __del__(self): """ Delete instance and clear cache """ self.cache=None; self.db.close(); def update(self): """update data base """ #~ pass for word in self.cache: self.add_checked(word, self.cache[word]) def is_already_checked(self, word): try: return bool(self.db.get('a', word)) except: return False #~ except: return False; def get_checked(self, word): try: x = self.db.get('a', word, with_doc=True) y = x.get('doc',False); if y: return y.get('d',[]) else: return [] except: return [] def add_checked(self, word, data): idata = {"a":word,'d':data} try: saved = self.db.get('a', word, with_doc=True) except: saved = False if saved: saved['doc']['d'] = data doc = saved['doc'] doc['update'] = True self.db.update(doc) else: self.db.insert(idata) def exists_cache_word(self, word): """ test if word exists in cache""" #if exists in cache dictionary if word in self.cache: return True else: # test in database if self.is_already_checked(word): stored_data = self.get_checked(word) self.cache[word] = stored_data return bool(self.cache[word]) else: # add null dict to the word index to avoid multiple database check self.cache[word] = {} return {} def get_relation_freq(self, word_prev, word_cur, relation): self.exists_cache_word(word_prev) return self.cache.get(word_prev, {}).get(word_cur, {}).get(relation, 0); def is_related(self, word_prev, word_cur): """ test if two words are related""" #serach in cache self.exists_cache_word(word_prev) # if exists in cache or database return self.cache.get(word_prev, {}).get(word_cur, {}); def add_relation(self, word_prev, word_cur, relation): #~ relation ='r'+str(relation) if word_prev not in self.cache: # test first that is in db cache if self.is_already_checked(word_prev): stored_data = self.get_checked(word_prev) self.cache[word_prev] = stored_data else: # create an new entry self.cache[word_prev] = {word_cur:{relation:1, }, } # word_prev exists # add word_cur to previous dict elif word_cur not in self.cache[word_prev]: self.cache[word_prev][word_cur] = {relation:1,} elif relation not in self.cache[word_prev][word_cur]: self.cache[word_prev][word_cur][relation] = 1 else: self.cache[word_prev][word_cur][relation] += 1 def display_all(self): """ display all contents of data base """ #~ pass print "aranasyn.cache: dislay all records in Thaalib Database """ for curr in self.db.all('a', with_doc=True): print curr['doc']['a'], arepr(curr['doc']['d'])