def add_seeds(self): '''adds seeds to db.coll with reliability score of 1.0''' self.logger.debug('add_seeds: %d %s' % (len(self.seeds), self.seeds)) for s in self.seeds: self.logger.debug('seed: %s' % s) args = s.split('\t') doc = {'arg%d' % n: v for n, v in enumerate(args, 1)} doc['it'] = 0 doc['score'] = 1.0 mongodb.cache(self.db, self.boot_i, doc)
def add_seeds(self): '''adds seeds to db.coll with reliability score of 1.0''' self.logger.debug('add_seeds: %d %s' % (len(self.seeds), self.seeds)) for s in self.seeds: self.logger.debug('seed: %s' % s) args = s.split('\t') doc = {'arg%d'%n:v for n,v in enumerate(args, 1)} doc['it'] = 0 doc['score'] = 1.0 mongodb.cache(self.db, self.boot_i, doc)
def iterate_p(self, mutexes=[]): '''perform an iteration of bootstrapping saving n patterns with the highest reliability score''' if not getattr(self, 'connection', None): self.init_connection() self.logger.info(' ### BOOTSTRAPPING PATTERN ITERATION: %d ###' % self.it) # read promoted instances of last bootstrpping iteration self.logger.info('getting promoted instances...' '') I = self.get_I(self.it - 1) self.logger.info('I: %d' % len(I)) self.logger.info('getting promoted instances: done.' '') # find matching patterns self.logger.info('getting matching patterns...') P_ = self.I2P(I) P = self.mutex_filter_p(I, P_, mutexes) self.logger.info('getting matching patterns: done.') # rank patterns by reliability score self.logger.info('ranking patterns ...') rs = self.scorer.rank_patterns(I, P, self.it) self.logger.info('ranking patterns: done.') # save top n to <matrix>_boot_p self.logger.info('saving top %d patterns...' % self.n) for r in rs[:self.n]: self.logger.info('r: %s' % r) mongodb.cache(self.db, self.boot_p, r) self.logger.info('saving top %d patterns: done.' % self.n) self.logger.info('ensuring indices ...') # index for iteration number self.db[self.boot_p].ensure_index([ ('it', pymongo.DESCENDING), ]) # index for <REL> self.db[self.boot_p].ensure_index([ ('rel', pymongo.ASCENDING), ]) self.logger.info('ensuring indices: done.')
def iterate_i(self, mutexes=[]): '''perform an iteration of bootstrapping saving n instances with the highest reliability score''' if not getattr(self, 'connection', None): self.init_connection() self.logger.info(' ### BOOTSTRAPPING INSTANCE ITERATION: %d ###' % self.it) # read promoted patterns of last bootstrpping iteration self.logger.info('getting promoted patterns...' '') P = self.get_P(self.it) self.logger.info('P: %d' % len(P)) self.logger.info('getting promoted patterns: done.' '') # find matching instances self.logger.info('getting matching instances...') I_ = self.P2I(P) I = self.mutex_filter_i(I_, P, mutexes) self.logger.info('getting matching instances: done.') # rank instances by reliability score self.logger.info('ranking instances ...') rs = self.scorer.rank_instances(I, P, self.it) self.logger.info('ranking instances: done.') # save top n to <matrix>_boot_p self.logger.info('saving top %d instances...' % self.n) for r in rs[:self.n]: self.logger.info('r: %s' % r) mongodb.cache(self.db, self.boot_i, r) self.logger.info('saving top %d instances: done.' % self.n) self.logger.info('ensuring indices ...') # index for iteration number self.db[self.boot_i].ensure_index([ ('it', pymongo.DESCENDING), ]) # index for <ARGJ,...,ARGN> self.db[self.boot_i].ensure_index([(arg, pymongo.ASCENDING) for arg in self.args]) self.logger.info('ensuring indices: done.')
def iterate_i(self, mutexes=[]): '''perform an iteration of bootstrapping saving n instances with the highest reliability score''' if not getattr(self, 'connection', None): self.init_connection() self.logger.info(' ### BOOTSTRAPPING INSTANCE ITERATION: %d ###' % self.it) # read promoted patterns of last bootstrpping iteration self.logger.info('getting promoted patterns...''') P = self.get_P(self.it) self.logger.info('P: %d' % len(P)) self.logger.info('getting promoted patterns: done.''') # find matching instances self.logger.info('getting matching instances...') I_ = self.P2I(P) I = self.mutex_filter_i(I_, P, mutexes) self.logger.info('getting matching instances: done.') # rank instances by reliability score self.logger.info('ranking instances ...') rs = self.scorer.rank_instances(I, P, self.it) self.logger.info('ranking instances: done.') # save top n to <matrix>_boot_p self.logger.info('saving top %d instances...' % self.n) for r in rs[:self.n]: self.logger.info('r: %s' % r) mongodb.cache(self.db, self.boot_i, r) self.logger.info('saving top %d instances: done.' % self.n) self.logger.info('ensuring indices ...') # index for iteration number self.db[self.boot_i].ensure_index( [('it', pymongo.DESCENDING), ] ) # index for <ARGJ,...,ARGN> self.db[self.boot_i].ensure_index( [(arg, pymongo.ASCENDING) for arg in self.args] ) self.logger.info('ensuring indices: done.')
def iterate_p(self, mutexes=[]): '''perform an iteration of bootstrapping saving n patterns with the highest reliability score''' if not getattr(self, 'connection', None): self.init_connection() self.logger.info(' ### BOOTSTRAPPING PATTERN ITERATION: %d ###' % self.it) # read promoted instances of last bootstrpping iteration self.logger.info('getting promoted instances...''') I = self.get_I(self.it-1) self.logger.info('I: %d' % len(I)) self.logger.info('getting promoted instances: done.''') # find matching patterns self.logger.info('getting matching patterns...') P_ = self.I2P(I) P = self.mutex_filter_p(I, P_, mutexes) self.logger.info('getting matching patterns: done.') # rank patterns by reliability score self.logger.info('ranking patterns ...') rs = self.scorer.rank_patterns(I, P, self.it) self.logger.info('ranking patterns: done.') # save top n to <matrix>_boot_p self.logger.info('saving top %d patterns...' % self.n) for r in rs[:self.n]: self.logger.info('r: %s' % r) mongodb.cache(self.db, self.boot_p, r) self.logger.info('saving top %d patterns: done.' % self.n) self.logger.info('ensuring indices ...') # index for iteration number self.db[self.boot_p].ensure_index( [('it', pymongo.DESCENDING), ] ) # index for <REL> self.db[self.boot_p].ensure_index( [('rel', pymongo.ASCENDING), ] ) self.logger.info('ensuring indices: done.')